1 // RUN: %clang_cc1 -triple thumbv7s-apple-darwin -target-abi apcs-gnu\
2 // RUN:  -target-cpu swift -fallow-half-arguments-and-returns \
3 // RUN:  -target-feature +fullfp16 -ffreestanding \
4 // RUN:  -flax-vector-conversions=none \
5 // RUN:  -disable-O0-optnone -emit-llvm -o - %s \
6 // RUN:  | opt -S -mem2reg | FileCheck %s
7 
8 #include <arm_neon.h>
9 
10 // CHECK-LABEL: @test_vaba_s8(
11 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
12 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
13 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_s8(int8x8_t a,int8x8_t b,int8x8_t c)14 int8x8_t test_vaba_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
15   return vaba_s8(a, b, c);
16 }
17 
18 // CHECK-LABEL: @test_vaba_s16(
19 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
21 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
22 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
23 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
24 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_s16(int16x4_t a,int16x4_t b,int16x4_t c)25 int16x4_t test_vaba_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
26   return vaba_s16(a, b, c);
27 }
28 
29 // CHECK-LABEL: @test_vaba_s32(
30 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
31 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
32 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
33 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
34 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
35 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_s32(int32x2_t a,int32x2_t b,int32x2_t c)36 int32x2_t test_vaba_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
37   return vaba_s32(a, b, c);
38 }
39 
40 // CHECK-LABEL: @test_vaba_u8(
41 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
42 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[VABD_V_I_I]]
43 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vaba_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)44 uint8x8_t test_vaba_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
45   return vaba_u8(a, b, c);
46 }
47 
48 // CHECK-LABEL: @test_vaba_u16(
49 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
50 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
51 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
52 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
53 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[VABD_V2_I_I]]
54 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vaba_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)55 uint16x4_t test_vaba_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
56   return vaba_u16(a, b, c);
57 }
58 
59 // CHECK-LABEL: @test_vaba_u32(
60 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
61 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
62 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
63 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
64 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[VABD_V2_I_I]]
65 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vaba_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)66 uint32x2_t test_vaba_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
67   return vaba_u32(a, b, c);
68 }
69 
70 // CHECK-LABEL: @test_vabaq_s8(
71 // CHECK:   [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %b, <16 x i8> %c)
72 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
73 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)74 int8x16_t test_vabaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
75   return vabaq_s8(a, b, c);
76 }
77 
78 // CHECK-LABEL: @test_vabaq_s16(
79 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
80 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
81 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %b, <8 x i16> %c)
82 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
83 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
84 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)85 int16x8_t test_vabaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
86   return vabaq_s16(a, b, c);
87 }
88 
89 // CHECK-LABEL: @test_vabaq_s32(
90 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
91 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
92 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %b, <4 x i32> %c)
93 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
94 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
95 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)96 int32x4_t test_vabaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
97   return vabaq_s32(a, b, c);
98 }
99 
100 // CHECK-LABEL: @test_vabaq_u8(
101 // CHECK:   [[VABDQ_V_I_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %b, <16 x i8> %c)
102 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[VABDQ_V_I_I]]
103 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vabaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)104 uint8x16_t test_vabaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
105   return vabaq_u8(a, b, c);
106 }
107 
108 // CHECK-LABEL: @test_vabaq_u16(
109 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8>
110 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %c to <16 x i8>
111 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %b, <8 x i16> %c)
112 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I_I]] to <16 x i8>
113 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VABDQ_V2_I_I]]
114 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)115 uint16x8_t test_vabaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
116   return vabaq_u16(a, b, c);
117 }
118 
119 // CHECK-LABEL: @test_vabaq_u32(
120 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8>
121 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %c to <16 x i8>
122 // CHECK:   [[VABDQ_V2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %b, <4 x i32> %c)
123 // CHECK:   [[VABDQ_V3_I_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I_I]] to <16 x i8>
124 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VABDQ_V2_I_I]]
125 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)126 uint32x4_t test_vabaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
127   return vabaq_u32(a, b, c);
128 }
129 
130 // CHECK-LABEL: @test_vabal_s8(
131 // CHECK:   [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %b, <8 x i8> %c)
132 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
133 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
134 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_s8(int16x8_t a,int8x8_t b,int8x8_t c)135 int16x8_t test_vabal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
136   return vabal_s8(a, b, c);
137 }
138 
139 // CHECK-LABEL: @test_vabal_s16(
140 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
141 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
142 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %b, <4 x i16> %c)
143 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
144 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
145 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
146 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
147 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_s16(int32x4_t a,int16x4_t b,int16x4_t c)148 int32x4_t test_vabal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
149   return vabal_s16(a, b, c);
150 }
151 
152 // CHECK-LABEL: @test_vabal_s32(
153 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
154 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
155 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %b, <2 x i32> %c)
156 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
157 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
158 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
159 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
160 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_s32(int64x2_t a,int32x2_t b,int32x2_t c)161 int64x2_t test_vabal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
162   return vabal_s32(a, b, c);
163 }
164 
165 // CHECK-LABEL: @test_vabal_u8(
166 // CHECK:   [[VABD_V_I_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %b, <8 x i8> %c)
167 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I_I]] to <8 x i16>
168 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I_I]]
169 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vabal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)170 uint16x8_t test_vabal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
171   return vabal_u8(a, b, c);
172 }
173 
174 // CHECK-LABEL: @test_vabal_u16(
175 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
176 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
177 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %b, <4 x i16> %c)
178 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
179 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I_I]] to <8 x i8>
180 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I_I]] to <4 x i32>
181 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I_I]]
182 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vabal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)183 uint32x4_t test_vabal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
184   return vabal_u16(a, b, c);
185 }
186 
187 // CHECK-LABEL: @test_vabal_u32(
188 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
189 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
190 // CHECK:   [[VABD_V2_I_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %b, <2 x i32> %c)
191 // CHECK:   [[VABD_V3_I_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
192 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I_I]] to <8 x i8>
193 // CHECK:   [[VMOVL_I_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I_I]] to <2 x i64>
194 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I_I]]
195 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vabal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)196 uint64x2_t test_vabal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
197   return vabal_u32(a, b, c);
198 }
199 
200 // CHECK-LABEL: @test_vabd_s8(
201 // CHECK:   [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
202 // CHECK:   ret <8 x i8> [[VABD_V_I]]
test_vabd_s8(int8x8_t a,int8x8_t b)203 int8x8_t test_vabd_s8(int8x8_t a, int8x8_t b) {
204   return vabd_s8(a, b);
205 }
206 
207 // CHECK-LABEL: @test_vabd_s16(
208 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
209 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
210 // CHECK:   [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
211 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
212 // CHECK:   ret <4 x i16> [[VABD_V2_I]]
test_vabd_s16(int16x4_t a,int16x4_t b)213 int16x4_t test_vabd_s16(int16x4_t a, int16x4_t b) {
214   return vabd_s16(a, b);
215 }
216 
217 // CHECK-LABEL: @test_vabd_s32(
218 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
219 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
220 // CHECK:   [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
221 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
222 // CHECK:   ret <2 x i32> [[VABD_V2_I]]
test_vabd_s32(int32x2_t a,int32x2_t b)223 int32x2_t test_vabd_s32(int32x2_t a, int32x2_t b) {
224   return vabd_s32(a, b);
225 }
226 
227 // CHECK-LABEL: @test_vabd_u8(
228 // CHECK:   [[VABD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
229 // CHECK:   ret <8 x i8> [[VABD_V_I]]
test_vabd_u8(uint8x8_t a,uint8x8_t b)230 uint8x8_t test_vabd_u8(uint8x8_t a, uint8x8_t b) {
231   return vabd_u8(a, b);
232 }
233 
234 // CHECK-LABEL: @test_vabd_u16(
235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
236 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
237 // CHECK:   [[VABD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
238 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I]] to <8 x i8>
239 // CHECK:   ret <4 x i16> [[VABD_V2_I]]
test_vabd_u16(uint16x4_t a,uint16x4_t b)240 uint16x4_t test_vabd_u16(uint16x4_t a, uint16x4_t b) {
241   return vabd_u16(a, b);
242 }
243 
244 // CHECK-LABEL: @test_vabd_u32(
245 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
246 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
247 // CHECK:   [[VABD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
248 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I]] to <8 x i8>
249 // CHECK:   ret <2 x i32> [[VABD_V2_I]]
test_vabd_u32(uint32x2_t a,uint32x2_t b)250 uint32x2_t test_vabd_u32(uint32x2_t a, uint32x2_t b) {
251   return vabd_u32(a, b);
252 }
253 
254 // CHECK-LABEL: @test_vabd_f32(
255 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
256 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
257 // CHECK:   [[VABD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %a, <2 x float> %b)
258 // CHECK:   [[VABD_V3_I:%.*]] = bitcast <2 x float> [[VABD_V2_I]] to <8 x i8>
259 // CHECK:   ret <2 x float> [[VABD_V2_I]]
test_vabd_f32(float32x2_t a,float32x2_t b)260 float32x2_t test_vabd_f32(float32x2_t a, float32x2_t b) {
261   return vabd_f32(a, b);
262 }
263 
264 // CHECK-LABEL: @test_vabdq_s8(
265 // CHECK:   [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8> %a, <16 x i8> %b)
266 // CHECK:   ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_s8(int8x16_t a,int8x16_t b)267 int8x16_t test_vabdq_s8(int8x16_t a, int8x16_t b) {
268   return vabdq_s8(a, b);
269 }
270 
271 // CHECK-LABEL: @test_vabdq_s16(
272 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
273 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
274 // CHECK:   [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16> %a, <8 x i16> %b)
275 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
276 // CHECK:   ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_s16(int16x8_t a,int16x8_t b)277 int16x8_t test_vabdq_s16(int16x8_t a, int16x8_t b) {
278   return vabdq_s16(a, b);
279 }
280 
281 // CHECK-LABEL: @test_vabdq_s32(
282 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
283 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
284 // CHECK:   [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabds.v4i32(<4 x i32> %a, <4 x i32> %b)
285 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
286 // CHECK:   ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_s32(int32x4_t a,int32x4_t b)287 int32x4_t test_vabdq_s32(int32x4_t a, int32x4_t b) {
288   return vabdq_s32(a, b);
289 }
290 
291 // CHECK-LABEL: @test_vabdq_u8(
292 // CHECK:   [[VABDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8> %a, <16 x i8> %b)
293 // CHECK:   ret <16 x i8> [[VABDQ_V_I]]
test_vabdq_u8(uint8x16_t a,uint8x16_t b)294 uint8x16_t test_vabdq_u8(uint8x16_t a, uint8x16_t b) {
295   return vabdq_u8(a, b);
296 }
297 
298 // CHECK-LABEL: @test_vabdq_u16(
299 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
300 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
301 // CHECK:   [[VABDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16> %a, <8 x i16> %b)
302 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <8 x i16> [[VABDQ_V2_I]] to <16 x i8>
303 // CHECK:   ret <8 x i16> [[VABDQ_V2_I]]
test_vabdq_u16(uint16x8_t a,uint16x8_t b)304 uint16x8_t test_vabdq_u16(uint16x8_t a, uint16x8_t b) {
305   return vabdq_u16(a, b);
306 }
307 
308 // CHECK-LABEL: @test_vabdq_u32(
309 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
310 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
311 // CHECK:   [[VABDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32> %a, <4 x i32> %b)
312 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <4 x i32> [[VABDQ_V2_I]] to <16 x i8>
313 // CHECK:   ret <4 x i32> [[VABDQ_V2_I]]
test_vabdq_u32(uint32x4_t a,uint32x4_t b)314 uint32x4_t test_vabdq_u32(uint32x4_t a, uint32x4_t b) {
315   return vabdq_u32(a, b);
316 }
317 
318 // CHECK-LABEL: @test_vabdq_f32(
319 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
320 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
321 // CHECK:   [[VABDQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %a, <4 x float> %b)
322 // CHECK:   [[VABDQ_V3_I:%.*]] = bitcast <4 x float> [[VABDQ_V2_I]] to <16 x i8>
323 // CHECK:   ret <4 x float> [[VABDQ_V2_I]]
test_vabdq_f32(float32x4_t a,float32x4_t b)324 float32x4_t test_vabdq_f32(float32x4_t a, float32x4_t b) {
325   return vabdq_f32(a, b);
326 }
327 
328 // CHECK-LABEL: @test_vabdl_s8(
329 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabds.v8i8(<8 x i8> %a, <8 x i8> %b)
330 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
331 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_s8(int8x8_t a,int8x8_t b)332 int16x8_t test_vabdl_s8(int8x8_t a, int8x8_t b) {
333   return vabdl_s8(a, b);
334 }
335 
336 // CHECK-LABEL: @test_vabdl_s16(
337 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
338 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
339 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabds.v4i16(<4 x i16> %a, <4 x i16> %b)
340 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
341 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
342 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
343 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_s16(int16x4_t a,int16x4_t b)344 int32x4_t test_vabdl_s16(int16x4_t a, int16x4_t b) {
345   return vabdl_s16(a, b);
346 }
347 
348 // CHECK-LABEL: @test_vabdl_s32(
349 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
350 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
351 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabds.v2i32(<2 x i32> %a, <2 x i32> %b)
352 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
353 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
354 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
355 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_s32(int32x2_t a,int32x2_t b)356 int64x2_t test_vabdl_s32(int32x2_t a, int32x2_t b) {
357   return vabdl_s32(a, b);
358 }
359 
360 // CHECK-LABEL: @test_vabdl_u8(
361 // CHECK:   [[VABD_V_I_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
362 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> [[VABD_V_I_I]] to <8 x i16>
363 // CHECK:   ret <8 x i16> [[VMOVL_I_I]]
test_vabdl_u8(uint8x8_t a,uint8x8_t b)364 uint16x8_t test_vabdl_u8(uint8x8_t a, uint8x8_t b) {
365   return vabdl_u8(a, b);
366 }
367 
368 // CHECK-LABEL: @test_vabdl_u16(
369 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
370 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
371 // CHECK:   [[VABD_V2_I_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
372 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
373 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VABD_V2_I_I]] to <8 x i8>
374 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> [[VABD_V2_I_I]] to <4 x i32>
375 // CHECK:   ret <4 x i32> [[VMOVL_I_I]]
test_vabdl_u16(uint16x4_t a,uint16x4_t b)376 uint32x4_t test_vabdl_u16(uint16x4_t a, uint16x4_t b) {
377   return vabdl_u16(a, b);
378 }
379 
380 // CHECK-LABEL: @test_vabdl_u32(
381 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
382 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
383 // CHECK:   [[VABD_V2_I_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32> %a, <2 x i32> %b)
384 // CHECK:   [[VABD_V3_I_I:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
385 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VABD_V2_I_I]] to <8 x i8>
386 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> [[VABD_V2_I_I]] to <2 x i64>
387 // CHECK:   ret <2 x i64> [[VMOVL_I_I]]
test_vabdl_u32(uint32x2_t a,uint32x2_t b)388 uint64x2_t test_vabdl_u32(uint32x2_t a, uint32x2_t b) {
389   return vabdl_u32(a, b);
390 }
391 
392 // CHECK-LABEL: @test_vabs_s8(
393 // CHECK:   [[VABS_I:%.*]] = call <8 x i8> @llvm.arm.neon.vabs.v8i8(<8 x i8> %a)
394 // CHECK:   ret <8 x i8> [[VABS_I]]
test_vabs_s8(int8x8_t a)395 int8x8_t test_vabs_s8(int8x8_t a) {
396   return vabs_s8(a);
397 }
398 
399 // CHECK-LABEL: @test_vabs_s16(
400 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
401 // CHECK:   [[VABS1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16> %a)
402 // CHECK:   ret <4 x i16> [[VABS1_I]]
test_vabs_s16(int16x4_t a)403 int16x4_t test_vabs_s16(int16x4_t a) {
404   return vabs_s16(a);
405 }
406 
407 // CHECK-LABEL: @test_vabs_s32(
408 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
409 // CHECK:   [[VABS1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32> %a)
410 // CHECK:   ret <2 x i32> [[VABS1_I]]
test_vabs_s32(int32x2_t a)411 int32x2_t test_vabs_s32(int32x2_t a) {
412   return vabs_s32(a);
413 }
414 
415 // CHECK-LABEL: @test_vabs_f32(
416 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
417 // CHECK:   [[VABS1_I:%.*]] = call <2 x float> @llvm.fabs.v2f32(<2 x float> %a)
418 // CHECK:   ret <2 x float> [[VABS1_I]]
test_vabs_f32(float32x2_t a)419 float32x2_t test_vabs_f32(float32x2_t a) {
420   return vabs_f32(a);
421 }
422 
423 // CHECK-LABEL: @test_vabsq_s8(
424 // CHECK:   [[VABS_I:%.*]] = call <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8> %a)
425 // CHECK:   ret <16 x i8> [[VABS_I]]
test_vabsq_s8(int8x16_t a)426 int8x16_t test_vabsq_s8(int8x16_t a) {
427   return vabsq_s8(a);
428 }
429 
430 // CHECK-LABEL: @test_vabsq_s16(
431 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
432 // CHECK:   [[VABS1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16> %a)
433 // CHECK:   ret <8 x i16> [[VABS1_I]]
test_vabsq_s16(int16x8_t a)434 int16x8_t test_vabsq_s16(int16x8_t a) {
435   return vabsq_s16(a);
436 }
437 
438 // CHECK-LABEL: @test_vabsq_s32(
439 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
440 // CHECK:   [[VABS1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32> %a)
441 // CHECK:   ret <4 x i32> [[VABS1_I]]
test_vabsq_s32(int32x4_t a)442 int32x4_t test_vabsq_s32(int32x4_t a) {
443   return vabsq_s32(a);
444 }
445 
446 // CHECK-LABEL: @test_vabsq_f32(
447 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
448 // CHECK:   [[VABS1_I:%.*]] = call <4 x float> @llvm.fabs.v4f32(<4 x float> %a)
449 // CHECK:   ret <4 x float> [[VABS1_I]]
test_vabsq_f32(float32x4_t a)450 float32x4_t test_vabsq_f32(float32x4_t a) {
451   return vabsq_f32(a);
452 }
453 
454 // CHECK-LABEL: @test_vadd_s8(
455 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, %b
456 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_s8(int8x8_t a,int8x8_t b)457 int8x8_t test_vadd_s8(int8x8_t a, int8x8_t b) {
458   return vadd_s8(a, b);
459 }
460 
461 // CHECK-LABEL: @test_vadd_s16(
462 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, %b
463 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_s16(int16x4_t a,int16x4_t b)464 int16x4_t test_vadd_s16(int16x4_t a, int16x4_t b) {
465   return vadd_s16(a, b);
466 }
467 
468 // CHECK-LABEL: @test_vadd_s32(
469 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, %b
470 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_s32(int32x2_t a,int32x2_t b)471 int32x2_t test_vadd_s32(int32x2_t a, int32x2_t b) {
472   return vadd_s32(a, b);
473 }
474 
475 // CHECK-LABEL: @test_vadd_s64(
476 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %a, %b
477 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_s64(int64x1_t a,int64x1_t b)478 int64x1_t test_vadd_s64(int64x1_t a, int64x1_t b) {
479   return vadd_s64(a, b);
480 }
481 
482 // CHECK-LABEL: @test_vadd_f32(
483 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, %b
484 // CHECK:   ret <2 x float> [[ADD_I]]
test_vadd_f32(float32x2_t a,float32x2_t b)485 float32x2_t test_vadd_f32(float32x2_t a, float32x2_t b) {
486   return vadd_f32(a, b);
487 }
488 
489 // CHECK-LABEL: @test_vadd_u8(
490 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, %b
491 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vadd_u8(uint8x8_t a,uint8x8_t b)492 uint8x8_t test_vadd_u8(uint8x8_t a, uint8x8_t b) {
493   return vadd_u8(a, b);
494 }
495 
496 // CHECK-LABEL: @test_vadd_u16(
497 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, %b
498 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vadd_u16(uint16x4_t a,uint16x4_t b)499 uint16x4_t test_vadd_u16(uint16x4_t a, uint16x4_t b) {
500   return vadd_u16(a, b);
501 }
502 
503 // CHECK-LABEL: @test_vadd_u32(
504 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, %b
505 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vadd_u32(uint32x2_t a,uint32x2_t b)506 uint32x2_t test_vadd_u32(uint32x2_t a, uint32x2_t b) {
507   return vadd_u32(a, b);
508 }
509 
510 // CHECK-LABEL: @test_vadd_u64(
511 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> %a, %b
512 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vadd_u64(uint64x1_t a,uint64x1_t b)513 uint64x1_t test_vadd_u64(uint64x1_t a, uint64x1_t b) {
514   return vadd_u64(a, b);
515 }
516 
517 // CHECK-LABEL: @test_vaddq_s8(
518 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, %b
519 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_s8(int8x16_t a,int8x16_t b)520 int8x16_t test_vaddq_s8(int8x16_t a, int8x16_t b) {
521   return vaddq_s8(a, b);
522 }
523 
524 // CHECK-LABEL: @test_vaddq_s16(
525 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, %b
526 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_s16(int16x8_t a,int16x8_t b)527 int16x8_t test_vaddq_s16(int16x8_t a, int16x8_t b) {
528   return vaddq_s16(a, b);
529 }
530 
531 // CHECK-LABEL: @test_vaddq_s32(
532 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, %b
533 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_s32(int32x4_t a,int32x4_t b)534 int32x4_t test_vaddq_s32(int32x4_t a, int32x4_t b) {
535   return vaddq_s32(a, b);
536 }
537 
538 // CHECK-LABEL: @test_vaddq_s64(
539 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, %b
540 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_s64(int64x2_t a,int64x2_t b)541 int64x2_t test_vaddq_s64(int64x2_t a, int64x2_t b) {
542   return vaddq_s64(a, b);
543 }
544 
545 // CHECK-LABEL: @test_vaddq_f32(
546 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, %b
547 // CHECK:   ret <4 x float> [[ADD_I]]
test_vaddq_f32(float32x4_t a,float32x4_t b)548 float32x4_t test_vaddq_f32(float32x4_t a, float32x4_t b) {
549   return vaddq_f32(a, b);
550 }
551 
552 // CHECK-LABEL: @test_vaddq_u8(
553 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, %b
554 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vaddq_u8(uint8x16_t a,uint8x16_t b)555 uint8x16_t test_vaddq_u8(uint8x16_t a, uint8x16_t b) {
556   return vaddq_u8(a, b);
557 }
558 
559 // CHECK-LABEL: @test_vaddq_u16(
560 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, %b
561 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddq_u16(uint16x8_t a,uint16x8_t b)562 uint16x8_t test_vaddq_u16(uint16x8_t a, uint16x8_t b) {
563   return vaddq_u16(a, b);
564 }
565 
566 // CHECK-LABEL: @test_vaddq_u32(
567 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, %b
568 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddq_u32(uint32x4_t a,uint32x4_t b)569 uint32x4_t test_vaddq_u32(uint32x4_t a, uint32x4_t b) {
570   return vaddq_u32(a, b);
571 }
572 
573 // CHECK-LABEL: @test_vaddq_u64(
574 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, %b
575 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddq_u64(uint64x2_t a,uint64x2_t b)576 uint64x2_t test_vaddq_u64(uint64x2_t a, uint64x2_t b) {
577   return vaddq_u64(a, b);
578 }
579 
580 // CHECK-LABEL: @test_vaddhn_s16(
581 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
582 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
583 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
584 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
585 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
586 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_s16(int16x8_t a,int16x8_t b)587 int8x8_t test_vaddhn_s16(int16x8_t a, int16x8_t b) {
588   return vaddhn_s16(a, b);
589 }
590 
591 // CHECK-LABEL: @test_vaddhn_s32(
592 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
593 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
594 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
595 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
596 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
597 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_s32(int32x4_t a,int32x4_t b)598 int16x4_t test_vaddhn_s32(int32x4_t a, int32x4_t b) {
599   return vaddhn_s32(a, b);
600 }
601 
602 // CHECK-LABEL: @test_vaddhn_s64(
603 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
604 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
605 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
606 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
607 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
608 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_s64(int64x2_t a,int64x2_t b)609 int32x2_t test_vaddhn_s64(int64x2_t a, int64x2_t b) {
610   return vaddhn_s64(a, b);
611 }
612 
613 // CHECK-LABEL: @test_vaddhn_u16(
614 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
615 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
616 // CHECK:   [[VADDHN_I:%.*]] = add <8 x i16> %a, %b
617 // CHECK:   [[VADDHN1_I:%.*]] = lshr <8 x i16> [[VADDHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
618 // CHECK:   [[VADDHN2_I:%.*]] = trunc <8 x i16> [[VADDHN1_I]] to <8 x i8>
619 // CHECK:   ret <8 x i8> [[VADDHN2_I]]
test_vaddhn_u16(uint16x8_t a,uint16x8_t b)620 uint8x8_t test_vaddhn_u16(uint16x8_t a, uint16x8_t b) {
621   return vaddhn_u16(a, b);
622 }
623 
624 // CHECK-LABEL: @test_vaddhn_u32(
625 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
626 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
627 // CHECK:   [[VADDHN_I:%.*]] = add <4 x i32> %a, %b
628 // CHECK:   [[VADDHN1_I:%.*]] = lshr <4 x i32> [[VADDHN_I]], <i32 16, i32 16, i32 16, i32 16>
629 // CHECK:   [[VADDHN2_I:%.*]] = trunc <4 x i32> [[VADDHN1_I]] to <4 x i16>
630 // CHECK:   ret <4 x i16> [[VADDHN2_I]]
test_vaddhn_u32(uint32x4_t a,uint32x4_t b)631 uint16x4_t test_vaddhn_u32(uint32x4_t a, uint32x4_t b) {
632   return vaddhn_u32(a, b);
633 }
634 
635 // CHECK-LABEL: @test_vaddhn_u64(
636 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
637 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
638 // CHECK:   [[VADDHN_I:%.*]] = add <2 x i64> %a, %b
639 // CHECK:   [[VADDHN1_I:%.*]] = lshr <2 x i64> [[VADDHN_I]], <i64 32, i64 32>
640 // CHECK:   [[VADDHN2_I:%.*]] = trunc <2 x i64> [[VADDHN1_I]] to <2 x i32>
641 // CHECK:   ret <2 x i32> [[VADDHN2_I]]
test_vaddhn_u64(uint64x2_t a,uint64x2_t b)642 uint32x2_t test_vaddhn_u64(uint64x2_t a, uint64x2_t b) {
643   return vaddhn_u64(a, b);
644 }
645 
646 // CHECK-LABEL: @test_vaddl_s8(
647 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
648 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
649 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
650 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_s8(int8x8_t a,int8x8_t b)651 int16x8_t test_vaddl_s8(int8x8_t a, int8x8_t b) {
652   return vaddl_s8(a, b);
653 }
654 
655 // CHECK-LABEL: @test_vaddl_s16(
656 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
657 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
658 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
659 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
660 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
661 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_s16(int16x4_t a,int16x4_t b)662 int32x4_t test_vaddl_s16(int16x4_t a, int16x4_t b) {
663   return vaddl_s16(a, b);
664 }
665 
666 // CHECK-LABEL: @test_vaddl_s32(
667 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
668 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
669 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
670 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
671 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
672 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_s32(int32x2_t a,int32x2_t b)673 int64x2_t test_vaddl_s32(int32x2_t a, int32x2_t b) {
674   return vaddl_s32(a, b);
675 }
676 
677 // CHECK-LABEL: @test_vaddl_u8(
678 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
679 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
680 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
681 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddl_u8(uint8x8_t a,uint8x8_t b)682 uint16x8_t test_vaddl_u8(uint8x8_t a, uint8x8_t b) {
683   return vaddl_u8(a, b);
684 }
685 
686 // CHECK-LABEL: @test_vaddl_u16(
687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
688 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
689 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
690 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
691 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
692 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddl_u16(uint16x4_t a,uint16x4_t b)693 uint32x4_t test_vaddl_u16(uint16x4_t a, uint16x4_t b) {
694   return vaddl_u16(a, b);
695 }
696 
697 // CHECK-LABEL: @test_vaddl_u32(
698 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
699 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
700 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
701 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
702 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
703 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddl_u32(uint32x2_t a,uint32x2_t b)704 uint64x2_t test_vaddl_u32(uint32x2_t a, uint32x2_t b) {
705   return vaddl_u32(a, b);
706 }
707 
708 // CHECK-LABEL: @test_vaddw_s8(
709 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
710 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
711 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_s8(int16x8_t a,int8x8_t b)712 int16x8_t test_vaddw_s8(int16x8_t a, int8x8_t b) {
713   return vaddw_s8(a, b);
714 }
715 
716 // CHECK-LABEL: @test_vaddw_s16(
717 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
718 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
719 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
720 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_s16(int32x4_t a,int16x4_t b)721 int32x4_t test_vaddw_s16(int32x4_t a, int16x4_t b) {
722   return vaddw_s16(a, b);
723 }
724 
725 // CHECK-LABEL: @test_vaddw_s32(
726 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
727 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
728 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
729 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_s32(int64x2_t a,int32x2_t b)730 int64x2_t test_vaddw_s32(int64x2_t a, int32x2_t b) {
731   return vaddw_s32(a, b);
732 }
733 
734 // CHECK-LABEL: @test_vaddw_u8(
735 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
736 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMOVL_I_I]]
737 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vaddw_u8(uint16x8_t a,uint8x8_t b)738 uint16x8_t test_vaddw_u8(uint16x8_t a, uint8x8_t b) {
739   return vaddw_u8(a, b);
740 }
741 
742 // CHECK-LABEL: @test_vaddw_u16(
743 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
744 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
745 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMOVL_I_I]]
746 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vaddw_u16(uint32x4_t a,uint16x4_t b)747 uint32x4_t test_vaddw_u16(uint32x4_t a, uint16x4_t b) {
748   return vaddw_u16(a, b);
749 }
750 
751 // CHECK-LABEL: @test_vaddw_u32(
752 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
753 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
754 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMOVL_I_I]]
755 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vaddw_u32(uint64x2_t a,uint32x2_t b)756 uint64x2_t test_vaddw_u32(uint64x2_t a, uint32x2_t b) {
757   return vaddw_u32(a, b);
758 }
759 
760 // CHECK-LABEL: @test_vand_s8(
761 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, %b
762 // CHECK:   ret <8 x i8> [[AND_I]]
test_vand_s8(int8x8_t a,int8x8_t b)763 int8x8_t test_vand_s8(int8x8_t a, int8x8_t b) {
764   return vand_s8(a, b);
765 }
766 
767 // CHECK-LABEL: @test_vand_s16(
768 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, %b
769 // CHECK:   ret <4 x i16> [[AND_I]]
test_vand_s16(int16x4_t a,int16x4_t b)770 int16x4_t test_vand_s16(int16x4_t a, int16x4_t b) {
771   return vand_s16(a, b);
772 }
773 
774 // CHECK-LABEL: @test_vand_s32(
775 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, %b
776 // CHECK:   ret <2 x i32> [[AND_I]]
test_vand_s32(int32x2_t a,int32x2_t b)777 int32x2_t test_vand_s32(int32x2_t a, int32x2_t b) {
778   return vand_s32(a, b);
779 }
780 
781 // CHECK-LABEL: @test_vand_s64(
782 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, %b
783 // CHECK:   ret <1 x i64> [[AND_I]]
test_vand_s64(int64x1_t a,int64x1_t b)784 int64x1_t test_vand_s64(int64x1_t a, int64x1_t b) {
785   return vand_s64(a, b);
786 }
787 
788 // CHECK-LABEL: @test_vand_u8(
789 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, %b
790 // CHECK:   ret <8 x i8> [[AND_I]]
test_vand_u8(uint8x8_t a,uint8x8_t b)791 uint8x8_t test_vand_u8(uint8x8_t a, uint8x8_t b) {
792   return vand_u8(a, b);
793 }
794 
795 // CHECK-LABEL: @test_vand_u16(
796 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, %b
797 // CHECK:   ret <4 x i16> [[AND_I]]
test_vand_u16(uint16x4_t a,uint16x4_t b)798 uint16x4_t test_vand_u16(uint16x4_t a, uint16x4_t b) {
799   return vand_u16(a, b);
800 }
801 
802 // CHECK-LABEL: @test_vand_u32(
803 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, %b
804 // CHECK:   ret <2 x i32> [[AND_I]]
test_vand_u32(uint32x2_t a,uint32x2_t b)805 uint32x2_t test_vand_u32(uint32x2_t a, uint32x2_t b) {
806   return vand_u32(a, b);
807 }
808 
809 // CHECK-LABEL: @test_vand_u64(
810 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, %b
811 // CHECK:   ret <1 x i64> [[AND_I]]
test_vand_u64(uint64x1_t a,uint64x1_t b)812 uint64x1_t test_vand_u64(uint64x1_t a, uint64x1_t b) {
813   return vand_u64(a, b);
814 }
815 
816 // CHECK-LABEL: @test_vandq_s8(
817 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
818 // CHECK:   ret <16 x i8> [[AND_I]]
test_vandq_s8(int8x16_t a,int8x16_t b)819 int8x16_t test_vandq_s8(int8x16_t a, int8x16_t b) {
820   return vandq_s8(a, b);
821 }
822 
823 // CHECK-LABEL: @test_vandq_s16(
824 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
825 // CHECK:   ret <8 x i16> [[AND_I]]
test_vandq_s16(int16x8_t a,int16x8_t b)826 int16x8_t test_vandq_s16(int16x8_t a, int16x8_t b) {
827   return vandq_s16(a, b);
828 }
829 
830 // CHECK-LABEL: @test_vandq_s32(
831 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
832 // CHECK:   ret <4 x i32> [[AND_I]]
test_vandq_s32(int32x4_t a,int32x4_t b)833 int32x4_t test_vandq_s32(int32x4_t a, int32x4_t b) {
834   return vandq_s32(a, b);
835 }
836 
837 // CHECK-LABEL: @test_vandq_s64(
838 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
839 // CHECK:   ret <2 x i64> [[AND_I]]
test_vandq_s64(int64x2_t a,int64x2_t b)840 int64x2_t test_vandq_s64(int64x2_t a, int64x2_t b) {
841   return vandq_s64(a, b);
842 }
843 
844 // CHECK-LABEL: @test_vandq_u8(
845 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, %b
846 // CHECK:   ret <16 x i8> [[AND_I]]
test_vandq_u8(uint8x16_t a,uint8x16_t b)847 uint8x16_t test_vandq_u8(uint8x16_t a, uint8x16_t b) {
848   return vandq_u8(a, b);
849 }
850 
851 // CHECK-LABEL: @test_vandq_u16(
852 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, %b
853 // CHECK:   ret <8 x i16> [[AND_I]]
test_vandq_u16(uint16x8_t a,uint16x8_t b)854 uint16x8_t test_vandq_u16(uint16x8_t a, uint16x8_t b) {
855   return vandq_u16(a, b);
856 }
857 
858 // CHECK-LABEL: @test_vandq_u32(
859 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, %b
860 // CHECK:   ret <4 x i32> [[AND_I]]
test_vandq_u32(uint32x4_t a,uint32x4_t b)861 uint32x4_t test_vandq_u32(uint32x4_t a, uint32x4_t b) {
862   return vandq_u32(a, b);
863 }
864 
865 // CHECK-LABEL: @test_vandq_u64(
866 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, %b
867 // CHECK:   ret <2 x i64> [[AND_I]]
test_vandq_u64(uint64x2_t a,uint64x2_t b)868 uint64x2_t test_vandq_u64(uint64x2_t a, uint64x2_t b) {
869   return vandq_u64(a, b);
870 }
871 
872 // CHECK-LABEL: @test_vbic_s8(
873 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
874 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
875 // CHECK:   ret <8 x i8> [[AND_I]]
test_vbic_s8(int8x8_t a,int8x8_t b)876 int8x8_t test_vbic_s8(int8x8_t a, int8x8_t b) {
877   return vbic_s8(a, b);
878 }
879 
880 // CHECK-LABEL: @test_vbic_s16(
881 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
882 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
883 // CHECK:   ret <4 x i16> [[AND_I]]
test_vbic_s16(int16x4_t a,int16x4_t b)884 int16x4_t test_vbic_s16(int16x4_t a, int16x4_t b) {
885   return vbic_s16(a, b);
886 }
887 
888 // CHECK-LABEL: @test_vbic_s32(
889 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
890 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
891 // CHECK:   ret <2 x i32> [[AND_I]]
test_vbic_s32(int32x2_t a,int32x2_t b)892 int32x2_t test_vbic_s32(int32x2_t a, int32x2_t b) {
893   return vbic_s32(a, b);
894 }
895 
896 // CHECK-LABEL: @test_vbic_s64(
897 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
898 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
899 // CHECK:   ret <1 x i64> [[AND_I]]
test_vbic_s64(int64x1_t a,int64x1_t b)900 int64x1_t test_vbic_s64(int64x1_t a, int64x1_t b) {
901   return vbic_s64(a, b);
902 }
903 
904 // CHECK-LABEL: @test_vbic_u8(
905 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
906 // CHECK:   [[AND_I:%.*]] = and <8 x i8> %a, [[NEG_I]]
907 // CHECK:   ret <8 x i8> [[AND_I]]
test_vbic_u8(uint8x8_t a,uint8x8_t b)908 uint8x8_t test_vbic_u8(uint8x8_t a, uint8x8_t b) {
909   return vbic_u8(a, b);
910 }
911 
912 // CHECK-LABEL: @test_vbic_u16(
913 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
914 // CHECK:   [[AND_I:%.*]] = and <4 x i16> %a, [[NEG_I]]
915 // CHECK:   ret <4 x i16> [[AND_I]]
test_vbic_u16(uint16x4_t a,uint16x4_t b)916 uint16x4_t test_vbic_u16(uint16x4_t a, uint16x4_t b) {
917   return vbic_u16(a, b);
918 }
919 
920 // CHECK-LABEL: @test_vbic_u32(
921 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
922 // CHECK:   [[AND_I:%.*]] = and <2 x i32> %a, [[NEG_I]]
923 // CHECK:   ret <2 x i32> [[AND_I]]
test_vbic_u32(uint32x2_t a,uint32x2_t b)924 uint32x2_t test_vbic_u32(uint32x2_t a, uint32x2_t b) {
925   return vbic_u32(a, b);
926 }
927 
928 // CHECK-LABEL: @test_vbic_u64(
929 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
930 // CHECK:   [[AND_I:%.*]] = and <1 x i64> %a, [[NEG_I]]
931 // CHECK:   ret <1 x i64> [[AND_I]]
test_vbic_u64(uint64x1_t a,uint64x1_t b)932 uint64x1_t test_vbic_u64(uint64x1_t a, uint64x1_t b) {
933   return vbic_u64(a, b);
934 }
935 
936 // CHECK-LABEL: @test_vbicq_s8(
937 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
938 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
939 // CHECK:   ret <16 x i8> [[AND_I]]
test_vbicq_s8(int8x16_t a,int8x16_t b)940 int8x16_t test_vbicq_s8(int8x16_t a, int8x16_t b) {
941   return vbicq_s8(a, b);
942 }
943 
944 // CHECK-LABEL: @test_vbicq_s16(
945 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
946 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
947 // CHECK:   ret <8 x i16> [[AND_I]]
test_vbicq_s16(int16x8_t a,int16x8_t b)948 int16x8_t test_vbicq_s16(int16x8_t a, int16x8_t b) {
949   return vbicq_s16(a, b);
950 }
951 
952 // CHECK-LABEL: @test_vbicq_s32(
953 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
954 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
955 // CHECK:   ret <4 x i32> [[AND_I]]
test_vbicq_s32(int32x4_t a,int32x4_t b)956 int32x4_t test_vbicq_s32(int32x4_t a, int32x4_t b) {
957   return vbicq_s32(a, b);
958 }
959 
960 // CHECK-LABEL: @test_vbicq_s64(
961 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
962 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
963 // CHECK:   ret <2 x i64> [[AND_I]]
test_vbicq_s64(int64x2_t a,int64x2_t b)964 int64x2_t test_vbicq_s64(int64x2_t a, int64x2_t b) {
965   return vbicq_s64(a, b);
966 }
967 
968 // CHECK-LABEL: @test_vbicq_u8(
969 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
970 // CHECK:   [[AND_I:%.*]] = and <16 x i8> %a, [[NEG_I]]
971 // CHECK:   ret <16 x i8> [[AND_I]]
test_vbicq_u8(uint8x16_t a,uint8x16_t b)972 uint8x16_t test_vbicq_u8(uint8x16_t a, uint8x16_t b) {
973   return vbicq_u8(a, b);
974 }
975 
976 // CHECK-LABEL: @test_vbicq_u16(
977 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
978 // CHECK:   [[AND_I:%.*]] = and <8 x i16> %a, [[NEG_I]]
979 // CHECK:   ret <8 x i16> [[AND_I]]
test_vbicq_u16(uint16x8_t a,uint16x8_t b)980 uint16x8_t test_vbicq_u16(uint16x8_t a, uint16x8_t b) {
981   return vbicq_u16(a, b);
982 }
983 
984 // CHECK-LABEL: @test_vbicq_u32(
985 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
986 // CHECK:   [[AND_I:%.*]] = and <4 x i32> %a, [[NEG_I]]
987 // CHECK:   ret <4 x i32> [[AND_I]]
test_vbicq_u32(uint32x4_t a,uint32x4_t b)988 uint32x4_t test_vbicq_u32(uint32x4_t a, uint32x4_t b) {
989   return vbicq_u32(a, b);
990 }
991 
992 // CHECK-LABEL: @test_vbicq_u64(
993 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
994 // CHECK:   [[AND_I:%.*]] = and <2 x i64> %a, [[NEG_I]]
995 // CHECK:   ret <2 x i64> [[AND_I]]
test_vbicq_u64(uint64x2_t a,uint64x2_t b)996 uint64x2_t test_vbicq_u64(uint64x2_t a, uint64x2_t b) {
997   return vbicq_u64(a, b);
998 }
999 
1000 // CHECK-LABEL: @test_vbsl_s8(
1001 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1002 // CHECK:   ret <8 x i8> [[VBSL_V_I]]
test_vbsl_s8(uint8x8_t a,int8x8_t b,int8x8_t c)1003 int8x8_t test_vbsl_s8(uint8x8_t a, int8x8_t b, int8x8_t c) {
1004   return vbsl_s8(a, b, c);
1005 }
1006 
1007 // CHECK-LABEL: @test_vbsl_s16(
1008 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1009 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1010 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1011 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1012 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1013 // CHECK:   ret <4 x i16> [[TMP3]]
test_vbsl_s16(uint16x4_t a,int16x4_t b,int16x4_t c)1014 int16x4_t test_vbsl_s16(uint16x4_t a, int16x4_t b, int16x4_t c) {
1015   return vbsl_s16(a, b, c);
1016 }
1017 
1018 // CHECK-LABEL: @test_vbsl_s32(
1019 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1020 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1021 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1022 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1023 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1024 // CHECK:   ret <2 x i32> [[TMP3]]
test_vbsl_s32(uint32x2_t a,int32x2_t b,int32x2_t c)1025 int32x2_t test_vbsl_s32(uint32x2_t a, int32x2_t b, int32x2_t c) {
1026   return vbsl_s32(a, b, c);
1027 }
1028 
1029 // CHECK-LABEL: @test_vbsl_s64(
1030 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1031 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1032 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1033 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1034 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1035 // CHECK:   ret <1 x i64> [[TMP3]]
test_vbsl_s64(uint64x1_t a,int64x1_t b,int64x1_t c)1036 int64x1_t test_vbsl_s64(uint64x1_t a, int64x1_t b, int64x1_t c) {
1037   return vbsl_s64(a, b, c);
1038 }
1039 
1040 // CHECK-LABEL: @test_vbsl_u8(
1041 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1042 // CHECK:   ret <8 x i8> [[VBSL_V_I]]
test_vbsl_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)1043 uint8x8_t test_vbsl_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
1044   return vbsl_u8(a, b, c);
1045 }
1046 
1047 // CHECK-LABEL: @test_vbsl_u16(
1048 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1049 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1050 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1051 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1052 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1053 // CHECK:   ret <4 x i16> [[TMP3]]
test_vbsl_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)1054 uint16x4_t test_vbsl_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
1055   return vbsl_u16(a, b, c);
1056 }
1057 
1058 // CHECK-LABEL: @test_vbsl_u32(
1059 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1060 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1061 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
1062 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1063 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x i32>
1064 // CHECK:   ret <2 x i32> [[TMP3]]
test_vbsl_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)1065 uint32x2_t test_vbsl_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
1066   return vbsl_u32(a, b, c);
1067 }
1068 
1069 // CHECK-LABEL: @test_vbsl_u64(
1070 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
1071 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
1072 // CHECK:   [[TMP2:%.*]] = bitcast <1 x i64> %c to <8 x i8>
1073 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1074 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <1 x i64>
1075 // CHECK:   ret <1 x i64> [[TMP3]]
test_vbsl_u64(uint64x1_t a,uint64x1_t b,uint64x1_t c)1076 uint64x1_t test_vbsl_u64(uint64x1_t a, uint64x1_t b, uint64x1_t c) {
1077   return vbsl_u64(a, b, c);
1078 }
1079 
1080 // CHECK-LABEL: @test_vbsl_f32(
1081 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1082 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1083 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
1084 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1085 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <2 x float>
1086 // CHECK:   ret <2 x float> [[TMP3]]
test_vbsl_f32(uint32x2_t a,float32x2_t b,float32x2_t c)1087 float32x2_t test_vbsl_f32(uint32x2_t a, float32x2_t b, float32x2_t c) {
1088   return vbsl_f32(a, b, c);
1089 }
1090 
1091 // CHECK-LABEL: @test_vbsl_p8(
1092 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
1093 // CHECK:   ret <8 x i8> [[VBSL_V_I]]
test_vbsl_p8(uint8x8_t a,poly8x8_t b,poly8x8_t c)1094 poly8x8_t test_vbsl_p8(uint8x8_t a, poly8x8_t b, poly8x8_t c) {
1095   return vbsl_p8(a, b, c);
1096 }
1097 
1098 // CHECK-LABEL: @test_vbsl_p16(
1099 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1100 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1101 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
1102 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
1103 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
1104 // CHECK:   ret <4 x i16> [[TMP3]]
test_vbsl_p16(uint16x4_t a,poly16x4_t b,poly16x4_t c)1105 poly16x4_t test_vbsl_p16(uint16x4_t a, poly16x4_t b, poly16x4_t c) {
1106   return vbsl_p16(a, b, c);
1107 }
1108 
1109 // CHECK-LABEL: @test_vbslq_s8(
1110 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1111 // CHECK:   ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_s8(uint8x16_t a,int8x16_t b,int8x16_t c)1112 int8x16_t test_vbslq_s8(uint8x16_t a, int8x16_t b, int8x16_t c) {
1113   return vbslq_s8(a, b, c);
1114 }
1115 
1116 // CHECK-LABEL: @test_vbslq_s16(
1117 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1118 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1119 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1120 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1121 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1122 // CHECK:   ret <8 x i16> [[TMP3]]
test_vbslq_s16(uint16x8_t a,int16x8_t b,int16x8_t c)1123 int16x8_t test_vbslq_s16(uint16x8_t a, int16x8_t b, int16x8_t c) {
1124   return vbslq_s16(a, b, c);
1125 }
1126 
1127 // CHECK-LABEL: @test_vbslq_s32(
1128 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1129 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1130 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1131 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1132 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1133 // CHECK:   ret <4 x i32> [[TMP3]]
test_vbslq_s32(uint32x4_t a,int32x4_t b,int32x4_t c)1134 int32x4_t test_vbslq_s32(uint32x4_t a, int32x4_t b, int32x4_t c) {
1135   return vbslq_s32(a, b, c);
1136 }
1137 
1138 // CHECK-LABEL: @test_vbslq_s64(
1139 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1140 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1141 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1142 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1143 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1144 // CHECK:   ret <2 x i64> [[TMP3]]
test_vbslq_s64(uint64x2_t a,int64x2_t b,int64x2_t c)1145 int64x2_t test_vbslq_s64(uint64x2_t a, int64x2_t b, int64x2_t c) {
1146   return vbslq_s64(a, b, c);
1147 }
1148 
1149 // CHECK-LABEL: @test_vbslq_u8(
1150 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1151 // CHECK:   ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)1152 uint8x16_t test_vbslq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
1153   return vbslq_u8(a, b, c);
1154 }
1155 
1156 // CHECK-LABEL: @test_vbslq_u16(
1157 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1158 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1159 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1160 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1161 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1162 // CHECK:   ret <8 x i16> [[TMP3]]
test_vbslq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)1163 uint16x8_t test_vbslq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
1164   return vbslq_u16(a, b, c);
1165 }
1166 
1167 // CHECK-LABEL: @test_vbslq_u32(
1168 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1169 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1170 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %c to <16 x i8>
1171 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1172 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x i32>
1173 // CHECK:   ret <4 x i32> [[TMP3]]
test_vbslq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)1174 uint32x4_t test_vbslq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
1175   return vbslq_u32(a, b, c);
1176 }
1177 
1178 // CHECK-LABEL: @test_vbslq_u64(
1179 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
1180 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
1181 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> %c to <16 x i8>
1182 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1183 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <2 x i64>
1184 // CHECK:   ret <2 x i64> [[TMP3]]
test_vbslq_u64(uint64x2_t a,uint64x2_t b,uint64x2_t c)1185 uint64x2_t test_vbslq_u64(uint64x2_t a, uint64x2_t b, uint64x2_t c) {
1186   return vbslq_u64(a, b, c);
1187 }
1188 
1189 // CHECK-LABEL: @test_vbslq_f32(
1190 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1191 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1192 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
1193 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1194 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <4 x float>
1195 // CHECK:   ret <4 x float> [[TMP3]]
test_vbslq_f32(uint32x4_t a,float32x4_t b,float32x4_t c)1196 float32x4_t test_vbslq_f32(uint32x4_t a, float32x4_t b, float32x4_t c) {
1197   return vbslq_f32(a, b, c);
1198 }
1199 
1200 // CHECK-LABEL: @test_vbslq_p8(
1201 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c)
1202 // CHECK:   ret <16 x i8> [[VBSLQ_V_I]]
test_vbslq_p8(uint8x16_t a,poly8x16_t b,poly8x16_t c)1203 poly8x16_t test_vbslq_p8(uint8x16_t a, poly8x16_t b, poly8x16_t c) {
1204   return vbslq_p8(a, b, c);
1205 }
1206 
1207 // CHECK-LABEL: @test_vbslq_p16(
1208 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1209 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1210 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %c to <16 x i8>
1211 // CHECK:   [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
1212 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x i16>
1213 // CHECK:   ret <8 x i16> [[TMP3]]
test_vbslq_p16(uint16x8_t a,poly16x8_t b,poly16x8_t c)1214 poly16x8_t test_vbslq_p16(uint16x8_t a, poly16x8_t b, poly16x8_t c) {
1215   return vbslq_p16(a, b, c);
1216 }
1217 
1218 // CHECK-LABEL: @test_vcage_f32(
1219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1220 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1221 // CHECK:   [[VCAGE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1222 // CHECK:   ret <2 x i32> [[VCAGE_V2_I]]
test_vcage_f32(float32x2_t a,float32x2_t b)1223 uint32x2_t test_vcage_f32(float32x2_t a, float32x2_t b) {
1224   return vcage_f32(a, b);
1225 }
1226 
1227 // CHECK-LABEL: @test_vcageq_f32(
1228 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1229 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1230 // CHECK:   [[VCAGEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1231 // CHECK:   ret <4 x i32> [[VCAGEQ_V2_I]]
test_vcageq_f32(float32x4_t a,float32x4_t b)1232 uint32x4_t test_vcageq_f32(float32x4_t a, float32x4_t b) {
1233   return vcageq_f32(a, b);
1234 }
1235 
1236 // CHECK-LABEL: @test_vcagt_f32(
1237 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1238 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1239 // CHECK:   [[VCAGT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %a, <2 x float> %b)
1240 // CHECK:   ret <2 x i32> [[VCAGT_V2_I]]
test_vcagt_f32(float32x2_t a,float32x2_t b)1241 uint32x2_t test_vcagt_f32(float32x2_t a, float32x2_t b) {
1242   return vcagt_f32(a, b);
1243 }
1244 
1245 // CHECK-LABEL: @test_vcagtq_f32(
1246 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1247 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1248 // CHECK:   [[VCAGTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %a, <4 x float> %b)
1249 // CHECK:   ret <4 x i32> [[VCAGTQ_V2_I]]
test_vcagtq_f32(float32x4_t a,float32x4_t b)1250 uint32x4_t test_vcagtq_f32(float32x4_t a, float32x4_t b) {
1251   return vcagtq_f32(a, b);
1252 }
1253 
1254 // CHECK-LABEL: @test_vcale_f32(
1255 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1256 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1257 // CHECK:   [[VCALE_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacge.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1258 // CHECK:   ret <2 x i32> [[VCALE_V2_I]]
test_vcale_f32(float32x2_t a,float32x2_t b)1259 uint32x2_t test_vcale_f32(float32x2_t a, float32x2_t b) {
1260   return vcale_f32(a, b);
1261 }
1262 
1263 // CHECK-LABEL: @test_vcaleq_f32(
1264 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1265 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1266 // CHECK:   [[VCALEQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacge.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1267 // CHECK:   ret <4 x i32> [[VCALEQ_V2_I]]
test_vcaleq_f32(float32x4_t a,float32x4_t b)1268 uint32x4_t test_vcaleq_f32(float32x4_t a, float32x4_t b) {
1269   return vcaleq_f32(a, b);
1270 }
1271 
1272 // CHECK-LABEL: @test_vcalt_f32(
1273 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
1274 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
1275 // CHECK:   [[VCALT_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vacgt.v2i32.v2f32(<2 x float> %b, <2 x float> %a)
1276 // CHECK:   ret <2 x i32> [[VCALT_V2_I]]
test_vcalt_f32(float32x2_t a,float32x2_t b)1277 uint32x2_t test_vcalt_f32(float32x2_t a, float32x2_t b) {
1278   return vcalt_f32(a, b);
1279 }
1280 
1281 // CHECK-LABEL: @test_vcaltq_f32(
1282 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
1283 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
1284 // CHECK:   [[VCALTQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vacgt.v4i32.v4f32(<4 x float> %b, <4 x float> %a)
1285 // CHECK:   ret <4 x i32> [[VCALTQ_V2_I]]
test_vcaltq_f32(float32x4_t a,float32x4_t b)1286 uint32x4_t test_vcaltq_f32(float32x4_t a, float32x4_t b) {
1287   return vcaltq_f32(a, b);
1288 }
1289 
1290 // CHECK-LABEL: @test_vceq_s8(
1291 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1292 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1293 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_s8(int8x8_t a,int8x8_t b)1294 uint8x8_t test_vceq_s8(int8x8_t a, int8x8_t b) {
1295   return vceq_s8(a, b);
1296 }
1297 
1298 // CHECK-LABEL: @test_vceq_s16(
1299 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1300 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1301 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_s16(int16x4_t a,int16x4_t b)1302 uint16x4_t test_vceq_s16(int16x4_t a, int16x4_t b) {
1303   return vceq_s16(a, b);
1304 }
1305 
1306 // CHECK-LABEL: @test_vceq_s32(
1307 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1308 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1309 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_s32(int32x2_t a,int32x2_t b)1310 uint32x2_t test_vceq_s32(int32x2_t a, int32x2_t b) {
1311   return vceq_s32(a, b);
1312 }
1313 
1314 // CHECK-LABEL: @test_vceq_f32(
1315 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <2 x float> %a, %b
1316 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1317 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_f32(float32x2_t a,float32x2_t b)1318 uint32x2_t test_vceq_f32(float32x2_t a, float32x2_t b) {
1319   return vceq_f32(a, b);
1320 }
1321 
1322 // CHECK-LABEL: @test_vceq_u8(
1323 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1324 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1325 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_u8(uint8x8_t a,uint8x8_t b)1326 uint8x8_t test_vceq_u8(uint8x8_t a, uint8x8_t b) {
1327   return vceq_u8(a, b);
1328 }
1329 
1330 // CHECK-LABEL: @test_vceq_u16(
1331 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i16> %a, %b
1332 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1333 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vceq_u16(uint16x4_t a,uint16x4_t b)1334 uint16x4_t test_vceq_u16(uint16x4_t a, uint16x4_t b) {
1335   return vceq_u16(a, b);
1336 }
1337 
1338 // CHECK-LABEL: @test_vceq_u32(
1339 // CHECK:   [[CMP_I:%.*]] = icmp eq <2 x i32> %a, %b
1340 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1341 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vceq_u32(uint32x2_t a,uint32x2_t b)1342 uint32x2_t test_vceq_u32(uint32x2_t a, uint32x2_t b) {
1343   return vceq_u32(a, b);
1344 }
1345 
1346 // CHECK-LABEL: @test_vceq_p8(
1347 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i8> %a, %b
1348 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1349 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vceq_p8(poly8x8_t a,poly8x8_t b)1350 uint8x8_t test_vceq_p8(poly8x8_t a, poly8x8_t b) {
1351   return vceq_p8(a, b);
1352 }
1353 
1354 // CHECK-LABEL: @test_vceqq_s8(
1355 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1356 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1357 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_s8(int8x16_t a,int8x16_t b)1358 uint8x16_t test_vceqq_s8(int8x16_t a, int8x16_t b) {
1359   return vceqq_s8(a, b);
1360 }
1361 
1362 // CHECK-LABEL: @test_vceqq_s16(
1363 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1364 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1365 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_s16(int16x8_t a,int16x8_t b)1366 uint16x8_t test_vceqq_s16(int16x8_t a, int16x8_t b) {
1367   return vceqq_s16(a, b);
1368 }
1369 
1370 // CHECK-LABEL: @test_vceqq_s32(
1371 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1372 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1373 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_s32(int32x4_t a,int32x4_t b)1374 uint32x4_t test_vceqq_s32(int32x4_t a, int32x4_t b) {
1375   return vceqq_s32(a, b);
1376 }
1377 
1378 // CHECK-LABEL: @test_vceqq_f32(
1379 // CHECK:   [[CMP_I:%.*]] = fcmp oeq <4 x float> %a, %b
1380 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1381 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_f32(float32x4_t a,float32x4_t b)1382 uint32x4_t test_vceqq_f32(float32x4_t a, float32x4_t b) {
1383   return vceqq_f32(a, b);
1384 }
1385 
1386 // CHECK-LABEL: @test_vceqq_u8(
1387 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1388 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1389 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_u8(uint8x16_t a,uint8x16_t b)1390 uint8x16_t test_vceqq_u8(uint8x16_t a, uint8x16_t b) {
1391   return vceqq_u8(a, b);
1392 }
1393 
1394 // CHECK-LABEL: @test_vceqq_u16(
1395 // CHECK:   [[CMP_I:%.*]] = icmp eq <8 x i16> %a, %b
1396 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1397 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vceqq_u16(uint16x8_t a,uint16x8_t b)1398 uint16x8_t test_vceqq_u16(uint16x8_t a, uint16x8_t b) {
1399   return vceqq_u16(a, b);
1400 }
1401 
1402 // CHECK-LABEL: @test_vceqq_u32(
1403 // CHECK:   [[CMP_I:%.*]] = icmp eq <4 x i32> %a, %b
1404 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1405 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vceqq_u32(uint32x4_t a,uint32x4_t b)1406 uint32x4_t test_vceqq_u32(uint32x4_t a, uint32x4_t b) {
1407   return vceqq_u32(a, b);
1408 }
1409 
1410 // CHECK-LABEL: @test_vceqq_p8(
1411 // CHECK:   [[CMP_I:%.*]] = icmp eq <16 x i8> %a, %b
1412 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1413 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vceqq_p8(poly8x16_t a,poly8x16_t b)1414 uint8x16_t test_vceqq_p8(poly8x16_t a, poly8x16_t b) {
1415   return vceqq_p8(a, b);
1416 }
1417 
1418 // CHECK-LABEL: @test_vcge_s8(
1419 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i8> %a, %b
1420 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1421 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_s8(int8x8_t a,int8x8_t b)1422 uint8x8_t test_vcge_s8(int8x8_t a, int8x8_t b) {
1423   return vcge_s8(a, b);
1424 }
1425 
1426 // CHECK-LABEL: @test_vcge_s16(
1427 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i16> %a, %b
1428 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1429 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_s16(int16x4_t a,int16x4_t b)1430 uint16x4_t test_vcge_s16(int16x4_t a, int16x4_t b) {
1431   return vcge_s16(a, b);
1432 }
1433 
1434 // CHECK-LABEL: @test_vcge_s32(
1435 // CHECK:   [[CMP_I:%.*]] = icmp sge <2 x i32> %a, %b
1436 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1437 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_s32(int32x2_t a,int32x2_t b)1438 uint32x2_t test_vcge_s32(int32x2_t a, int32x2_t b) {
1439   return vcge_s32(a, b);
1440 }
1441 
1442 // CHECK-LABEL: @test_vcge_f32(
1443 // CHECK:   [[CMP_I:%.*]] = fcmp oge <2 x float> %a, %b
1444 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1445 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_f32(float32x2_t a,float32x2_t b)1446 uint32x2_t test_vcge_f32(float32x2_t a, float32x2_t b) {
1447   return vcge_f32(a, b);
1448 }
1449 
1450 // CHECK-LABEL: @test_vcge_u8(
1451 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i8> %a, %b
1452 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1453 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcge_u8(uint8x8_t a,uint8x8_t b)1454 uint8x8_t test_vcge_u8(uint8x8_t a, uint8x8_t b) {
1455   return vcge_u8(a, b);
1456 }
1457 
1458 // CHECK-LABEL: @test_vcge_u16(
1459 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i16> %a, %b
1460 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1461 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcge_u16(uint16x4_t a,uint16x4_t b)1462 uint16x4_t test_vcge_u16(uint16x4_t a, uint16x4_t b) {
1463   return vcge_u16(a, b);
1464 }
1465 
1466 // CHECK-LABEL: @test_vcge_u32(
1467 // CHECK:   [[CMP_I:%.*]] = icmp uge <2 x i32> %a, %b
1468 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1469 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcge_u32(uint32x2_t a,uint32x2_t b)1470 uint32x2_t test_vcge_u32(uint32x2_t a, uint32x2_t b) {
1471   return vcge_u32(a, b);
1472 }
1473 
1474 // CHECK-LABEL: @test_vcgeq_s8(
1475 // CHECK:   [[CMP_I:%.*]] = icmp sge <16 x i8> %a, %b
1476 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1477 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_s8(int8x16_t a,int8x16_t b)1478 uint8x16_t test_vcgeq_s8(int8x16_t a, int8x16_t b) {
1479   return vcgeq_s8(a, b);
1480 }
1481 
1482 // CHECK-LABEL: @test_vcgeq_s16(
1483 // CHECK:   [[CMP_I:%.*]] = icmp sge <8 x i16> %a, %b
1484 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1485 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_s16(int16x8_t a,int16x8_t b)1486 uint16x8_t test_vcgeq_s16(int16x8_t a, int16x8_t b) {
1487   return vcgeq_s16(a, b);
1488 }
1489 
1490 // CHECK-LABEL: @test_vcgeq_s32(
1491 // CHECK:   [[CMP_I:%.*]] = icmp sge <4 x i32> %a, %b
1492 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1493 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_s32(int32x4_t a,int32x4_t b)1494 uint32x4_t test_vcgeq_s32(int32x4_t a, int32x4_t b) {
1495   return vcgeq_s32(a, b);
1496 }
1497 
1498 // CHECK-LABEL: @test_vcgeq_f32(
1499 // CHECK:   [[CMP_I:%.*]] = fcmp oge <4 x float> %a, %b
1500 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1501 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_f32(float32x4_t a,float32x4_t b)1502 uint32x4_t test_vcgeq_f32(float32x4_t a, float32x4_t b) {
1503   return vcgeq_f32(a, b);
1504 }
1505 
1506 // CHECK-LABEL: @test_vcgeq_u8(
1507 // CHECK:   [[CMP_I:%.*]] = icmp uge <16 x i8> %a, %b
1508 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1509 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgeq_u8(uint8x16_t a,uint8x16_t b)1510 uint8x16_t test_vcgeq_u8(uint8x16_t a, uint8x16_t b) {
1511   return vcgeq_u8(a, b);
1512 }
1513 
1514 // CHECK-LABEL: @test_vcgeq_u16(
1515 // CHECK:   [[CMP_I:%.*]] = icmp uge <8 x i16> %a, %b
1516 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1517 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgeq_u16(uint16x8_t a,uint16x8_t b)1518 uint16x8_t test_vcgeq_u16(uint16x8_t a, uint16x8_t b) {
1519   return vcgeq_u16(a, b);
1520 }
1521 
1522 // CHECK-LABEL: @test_vcgeq_u32(
1523 // CHECK:   [[CMP_I:%.*]] = icmp uge <4 x i32> %a, %b
1524 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1525 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgeq_u32(uint32x4_t a,uint32x4_t b)1526 uint32x4_t test_vcgeq_u32(uint32x4_t a, uint32x4_t b) {
1527   return vcgeq_u32(a, b);
1528 }
1529 
1530 // CHECK-LABEL: @test_vcgt_s8(
1531 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i8> %a, %b
1532 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1533 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_s8(int8x8_t a,int8x8_t b)1534 uint8x8_t test_vcgt_s8(int8x8_t a, int8x8_t b) {
1535   return vcgt_s8(a, b);
1536 }
1537 
1538 // CHECK-LABEL: @test_vcgt_s16(
1539 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i16> %a, %b
1540 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1541 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_s16(int16x4_t a,int16x4_t b)1542 uint16x4_t test_vcgt_s16(int16x4_t a, int16x4_t b) {
1543   return vcgt_s16(a, b);
1544 }
1545 
1546 // CHECK-LABEL: @test_vcgt_s32(
1547 // CHECK:   [[CMP_I:%.*]] = icmp sgt <2 x i32> %a, %b
1548 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1549 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_s32(int32x2_t a,int32x2_t b)1550 uint32x2_t test_vcgt_s32(int32x2_t a, int32x2_t b) {
1551   return vcgt_s32(a, b);
1552 }
1553 
1554 // CHECK-LABEL: @test_vcgt_f32(
1555 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <2 x float> %a, %b
1556 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1557 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_f32(float32x2_t a,float32x2_t b)1558 uint32x2_t test_vcgt_f32(float32x2_t a, float32x2_t b) {
1559   return vcgt_f32(a, b);
1560 }
1561 
1562 // CHECK-LABEL: @test_vcgt_u8(
1563 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i8> %a, %b
1564 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1565 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcgt_u8(uint8x8_t a,uint8x8_t b)1566 uint8x8_t test_vcgt_u8(uint8x8_t a, uint8x8_t b) {
1567   return vcgt_u8(a, b);
1568 }
1569 
1570 // CHECK-LABEL: @test_vcgt_u16(
1571 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i16> %a, %b
1572 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1573 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcgt_u16(uint16x4_t a,uint16x4_t b)1574 uint16x4_t test_vcgt_u16(uint16x4_t a, uint16x4_t b) {
1575   return vcgt_u16(a, b);
1576 }
1577 
1578 // CHECK-LABEL: @test_vcgt_u32(
1579 // CHECK:   [[CMP_I:%.*]] = icmp ugt <2 x i32> %a, %b
1580 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1581 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcgt_u32(uint32x2_t a,uint32x2_t b)1582 uint32x2_t test_vcgt_u32(uint32x2_t a, uint32x2_t b) {
1583   return vcgt_u32(a, b);
1584 }
1585 
1586 // CHECK-LABEL: @test_vcgtq_s8(
1587 // CHECK:   [[CMP_I:%.*]] = icmp sgt <16 x i8> %a, %b
1588 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1589 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_s8(int8x16_t a,int8x16_t b)1590 uint8x16_t test_vcgtq_s8(int8x16_t a, int8x16_t b) {
1591   return vcgtq_s8(a, b);
1592 }
1593 
1594 // CHECK-LABEL: @test_vcgtq_s16(
1595 // CHECK:   [[CMP_I:%.*]] = icmp sgt <8 x i16> %a, %b
1596 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1597 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_s16(int16x8_t a,int16x8_t b)1598 uint16x8_t test_vcgtq_s16(int16x8_t a, int16x8_t b) {
1599   return vcgtq_s16(a, b);
1600 }
1601 
1602 // CHECK-LABEL: @test_vcgtq_s32(
1603 // CHECK:   [[CMP_I:%.*]] = icmp sgt <4 x i32> %a, %b
1604 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1605 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_s32(int32x4_t a,int32x4_t b)1606 uint32x4_t test_vcgtq_s32(int32x4_t a, int32x4_t b) {
1607   return vcgtq_s32(a, b);
1608 }
1609 
1610 // CHECK-LABEL: @test_vcgtq_f32(
1611 // CHECK:   [[CMP_I:%.*]] = fcmp ogt <4 x float> %a, %b
1612 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1613 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_f32(float32x4_t a,float32x4_t b)1614 uint32x4_t test_vcgtq_f32(float32x4_t a, float32x4_t b) {
1615   return vcgtq_f32(a, b);
1616 }
1617 
1618 // CHECK-LABEL: @test_vcgtq_u8(
1619 // CHECK:   [[CMP_I:%.*]] = icmp ugt <16 x i8> %a, %b
1620 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1621 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcgtq_u8(uint8x16_t a,uint8x16_t b)1622 uint8x16_t test_vcgtq_u8(uint8x16_t a, uint8x16_t b) {
1623   return vcgtq_u8(a, b);
1624 }
1625 
1626 // CHECK-LABEL: @test_vcgtq_u16(
1627 // CHECK:   [[CMP_I:%.*]] = icmp ugt <8 x i16> %a, %b
1628 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1629 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcgtq_u16(uint16x8_t a,uint16x8_t b)1630 uint16x8_t test_vcgtq_u16(uint16x8_t a, uint16x8_t b) {
1631   return vcgtq_u16(a, b);
1632 }
1633 
1634 // CHECK-LABEL: @test_vcgtq_u32(
1635 // CHECK:   [[CMP_I:%.*]] = icmp ugt <4 x i32> %a, %b
1636 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1637 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcgtq_u32(uint32x4_t a,uint32x4_t b)1638 uint32x4_t test_vcgtq_u32(uint32x4_t a, uint32x4_t b) {
1639   return vcgtq_u32(a, b);
1640 }
1641 
1642 // CHECK-LABEL: @test_vcle_s8(
1643 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i8> %a, %b
1644 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1645 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_s8(int8x8_t a,int8x8_t b)1646 uint8x8_t test_vcle_s8(int8x8_t a, int8x8_t b) {
1647   return vcle_s8(a, b);
1648 }
1649 
1650 // CHECK-LABEL: @test_vcle_s16(
1651 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i16> %a, %b
1652 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1653 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_s16(int16x4_t a,int16x4_t b)1654 uint16x4_t test_vcle_s16(int16x4_t a, int16x4_t b) {
1655   return vcle_s16(a, b);
1656 }
1657 
1658 // CHECK-LABEL: @test_vcle_s32(
1659 // CHECK:   [[CMP_I:%.*]] = icmp sle <2 x i32> %a, %b
1660 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1661 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_s32(int32x2_t a,int32x2_t b)1662 uint32x2_t test_vcle_s32(int32x2_t a, int32x2_t b) {
1663   return vcle_s32(a, b);
1664 }
1665 
1666 // CHECK-LABEL: @test_vcle_f32(
1667 // CHECK:   [[CMP_I:%.*]] = fcmp ole <2 x float> %a, %b
1668 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1669 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_f32(float32x2_t a,float32x2_t b)1670 uint32x2_t test_vcle_f32(float32x2_t a, float32x2_t b) {
1671   return vcle_f32(a, b);
1672 }
1673 
1674 // CHECK-LABEL: @test_vcle_u8(
1675 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i8> %a, %b
1676 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1677 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vcle_u8(uint8x8_t a,uint8x8_t b)1678 uint8x8_t test_vcle_u8(uint8x8_t a, uint8x8_t b) {
1679   return vcle_u8(a, b);
1680 }
1681 
1682 // CHECK-LABEL: @test_vcle_u16(
1683 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i16> %a, %b
1684 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1685 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vcle_u16(uint16x4_t a,uint16x4_t b)1686 uint16x4_t test_vcle_u16(uint16x4_t a, uint16x4_t b) {
1687   return vcle_u16(a, b);
1688 }
1689 
1690 // CHECK-LABEL: @test_vcle_u32(
1691 // CHECK:   [[CMP_I:%.*]] = icmp ule <2 x i32> %a, %b
1692 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1693 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vcle_u32(uint32x2_t a,uint32x2_t b)1694 uint32x2_t test_vcle_u32(uint32x2_t a, uint32x2_t b) {
1695   return vcle_u32(a, b);
1696 }
1697 
1698 // CHECK-LABEL: @test_vcleq_s8(
1699 // CHECK:   [[CMP_I:%.*]] = icmp sle <16 x i8> %a, %b
1700 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1701 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_s8(int8x16_t a,int8x16_t b)1702 uint8x16_t test_vcleq_s8(int8x16_t a, int8x16_t b) {
1703   return vcleq_s8(a, b);
1704 }
1705 
1706 // CHECK-LABEL: @test_vcleq_s16(
1707 // CHECK:   [[CMP_I:%.*]] = icmp sle <8 x i16> %a, %b
1708 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1709 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_s16(int16x8_t a,int16x8_t b)1710 uint16x8_t test_vcleq_s16(int16x8_t a, int16x8_t b) {
1711   return vcleq_s16(a, b);
1712 }
1713 
1714 // CHECK-LABEL: @test_vcleq_s32(
1715 // CHECK:   [[CMP_I:%.*]] = icmp sle <4 x i32> %a, %b
1716 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1717 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_s32(int32x4_t a,int32x4_t b)1718 uint32x4_t test_vcleq_s32(int32x4_t a, int32x4_t b) {
1719   return vcleq_s32(a, b);
1720 }
1721 
1722 // CHECK-LABEL: @test_vcleq_f32(
1723 // CHECK:   [[CMP_I:%.*]] = fcmp ole <4 x float> %a, %b
1724 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1725 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_f32(float32x4_t a,float32x4_t b)1726 uint32x4_t test_vcleq_f32(float32x4_t a, float32x4_t b) {
1727   return vcleq_f32(a, b);
1728 }
1729 
1730 // CHECK-LABEL: @test_vcleq_u8(
1731 // CHECK:   [[CMP_I:%.*]] = icmp ule <16 x i8> %a, %b
1732 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1733 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcleq_u8(uint8x16_t a,uint8x16_t b)1734 uint8x16_t test_vcleq_u8(uint8x16_t a, uint8x16_t b) {
1735   return vcleq_u8(a, b);
1736 }
1737 
1738 // CHECK-LABEL: @test_vcleq_u16(
1739 // CHECK:   [[CMP_I:%.*]] = icmp ule <8 x i16> %a, %b
1740 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1741 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcleq_u16(uint16x8_t a,uint16x8_t b)1742 uint16x8_t test_vcleq_u16(uint16x8_t a, uint16x8_t b) {
1743   return vcleq_u16(a, b);
1744 }
1745 
1746 // CHECK-LABEL: @test_vcleq_u32(
1747 // CHECK:   [[CMP_I:%.*]] = icmp ule <4 x i32> %a, %b
1748 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1749 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcleq_u32(uint32x4_t a,uint32x4_t b)1750 uint32x4_t test_vcleq_u32(uint32x4_t a, uint32x4_t b) {
1751   return vcleq_u32(a, b);
1752 }
1753 
1754 // CHECK-LABEL: @test_vcls_s8(
1755 // CHECK:   [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1756 // CHECK:   ret <8 x i8> [[VCLS_V_I]]
test_vcls_s8(int8x8_t a)1757 int8x8_t test_vcls_s8(int8x8_t a) {
1758   return vcls_s8(a);
1759 }
1760 
1761 // CHECK-LABEL: @test_vcls_s16(
1762 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1763 // CHECK:   [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1764 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1765 // CHECK:   ret <4 x i16> [[VCLS_V1_I]]
test_vcls_s16(int16x4_t a)1766 int16x4_t test_vcls_s16(int16x4_t a) {
1767   return vcls_s16(a);
1768 }
1769 
1770 // CHECK-LABEL: @test_vcls_s32(
1771 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1772 // CHECK:   [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1773 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1774 // CHECK:   ret <2 x i32> [[VCLS_V1_I]]
test_vcls_s32(int32x2_t a)1775 int32x2_t test_vcls_s32(int32x2_t a) {
1776   return vcls_s32(a);
1777 }
1778 
1779 // CHECK-LABEL: @test_vcls_u8(
1780 // CHECK:   [[VCLS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vcls.v8i8(<8 x i8> %a)
1781 // CHECK:   ret <8 x i8> [[VCLS_V_I]]
test_vcls_u8(uint8x8_t a)1782 int8x8_t test_vcls_u8(uint8x8_t a) {
1783   return vcls_u8(a);
1784 }
1785 
1786 // CHECK-LABEL: @test_vcls_u16(
1787 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1788 // CHECK:   [[VCLS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcls.v4i16(<4 x i16> %a)
1789 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <4 x i16> [[VCLS_V1_I]] to <8 x i8>
1790 // CHECK:   ret <4 x i16> [[VCLS_V1_I]]
test_vcls_u16(uint16x4_t a)1791 int16x4_t test_vcls_u16(uint16x4_t a) {
1792   return vcls_u16(a);
1793 }
1794 
1795 // CHECK-LABEL: @test_vcls_u32(
1796 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1797 // CHECK:   [[VCLS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vcls.v2i32(<2 x i32> %a)
1798 // CHECK:   [[VCLS_V2_I:%.*]] = bitcast <2 x i32> [[VCLS_V1_I]] to <8 x i8>
1799 // CHECK:   ret <2 x i32> [[VCLS_V1_I]]
test_vcls_u32(uint32x2_t a)1800 int32x2_t test_vcls_u32(uint32x2_t a) {
1801   return vcls_u32(a);
1802 }
1803 
1804 // CHECK-LABEL: @test_vclsq_s8(
1805 // CHECK:   [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1806 // CHECK:   ret <16 x i8> [[VCLSQ_V_I]]
test_vclsq_s8(int8x16_t a)1807 int8x16_t test_vclsq_s8(int8x16_t a) {
1808   return vclsq_s8(a);
1809 }
1810 
1811 // CHECK-LABEL: @test_vclsq_s16(
1812 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1813 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1814 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1815 // CHECK:   ret <8 x i16> [[VCLSQ_V1_I]]
test_vclsq_s16(int16x8_t a)1816 int16x8_t test_vclsq_s16(int16x8_t a) {
1817   return vclsq_s16(a);
1818 }
1819 
1820 // CHECK-LABEL: @test_vclsq_s32(
1821 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1822 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1823 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1824 // CHECK:   ret <4 x i32> [[VCLSQ_V1_I]]
test_vclsq_s32(int32x4_t a)1825 int32x4_t test_vclsq_s32(int32x4_t a) {
1826   return vclsq_s32(a);
1827 }
1828 
1829 // CHECK-LABEL: @test_vclsq_u8(
1830 // CHECK:   [[VCLSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vcls.v16i8(<16 x i8> %a)
1831 // CHECK:   ret <16 x i8> [[VCLSQ_V_I]]
test_vclsq_u8(uint8x16_t a)1832 int8x16_t test_vclsq_u8(uint8x16_t a) {
1833   return vclsq_u8(a);
1834 }
1835 
1836 // CHECK-LABEL: @test_vclsq_u16(
1837 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1838 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vcls.v8i16(<8 x i16> %a)
1839 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLSQ_V1_I]] to <16 x i8>
1840 // CHECK:   ret <8 x i16> [[VCLSQ_V1_I]]
test_vclsq_u16(uint16x8_t a)1841 int16x8_t test_vclsq_u16(uint16x8_t a) {
1842   return vclsq_u16(a);
1843 }
1844 
1845 // CHECK-LABEL: @test_vclsq_u32(
1846 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1847 // CHECK:   [[VCLSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vcls.v4i32(<4 x i32> %a)
1848 // CHECK:   [[VCLSQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLSQ_V1_I]] to <16 x i8>
1849 // CHECK:   ret <4 x i32> [[VCLSQ_V1_I]]
test_vclsq_u32(uint32x4_t a)1850 int32x4_t test_vclsq_u32(uint32x4_t a) {
1851   return vclsq_u32(a);
1852 }
1853 
1854 // CHECK-LABEL: @test_vclt_s8(
1855 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i8> %a, %b
1856 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1857 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_s8(int8x8_t a,int8x8_t b)1858 uint8x8_t test_vclt_s8(int8x8_t a, int8x8_t b) {
1859   return vclt_s8(a, b);
1860 }
1861 
1862 // CHECK-LABEL: @test_vclt_s16(
1863 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i16> %a, %b
1864 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1865 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_s16(int16x4_t a,int16x4_t b)1866 uint16x4_t test_vclt_s16(int16x4_t a, int16x4_t b) {
1867   return vclt_s16(a, b);
1868 }
1869 
1870 // CHECK-LABEL: @test_vclt_s32(
1871 // CHECK:   [[CMP_I:%.*]] = icmp slt <2 x i32> %a, %b
1872 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1873 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_s32(int32x2_t a,int32x2_t b)1874 uint32x2_t test_vclt_s32(int32x2_t a, int32x2_t b) {
1875   return vclt_s32(a, b);
1876 }
1877 
1878 // CHECK-LABEL: @test_vclt_f32(
1879 // CHECK:   [[CMP_I:%.*]] = fcmp olt <2 x float> %a, %b
1880 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1881 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_f32(float32x2_t a,float32x2_t b)1882 uint32x2_t test_vclt_f32(float32x2_t a, float32x2_t b) {
1883   return vclt_f32(a, b);
1884 }
1885 
1886 // CHECK-LABEL: @test_vclt_u8(
1887 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i8> %a, %b
1888 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i8>
1889 // CHECK:   ret <8 x i8> [[SEXT_I]]
test_vclt_u8(uint8x8_t a,uint8x8_t b)1890 uint8x8_t test_vclt_u8(uint8x8_t a, uint8x8_t b) {
1891   return vclt_u8(a, b);
1892 }
1893 
1894 // CHECK-LABEL: @test_vclt_u16(
1895 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i16> %a, %b
1896 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i16>
1897 // CHECK:   ret <4 x i16> [[SEXT_I]]
test_vclt_u16(uint16x4_t a,uint16x4_t b)1898 uint16x4_t test_vclt_u16(uint16x4_t a, uint16x4_t b) {
1899   return vclt_u16(a, b);
1900 }
1901 
1902 // CHECK-LABEL: @test_vclt_u32(
1903 // CHECK:   [[CMP_I:%.*]] = icmp ult <2 x i32> %a, %b
1904 // CHECK:   [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i32>
1905 // CHECK:   ret <2 x i32> [[SEXT_I]]
test_vclt_u32(uint32x2_t a,uint32x2_t b)1906 uint32x2_t test_vclt_u32(uint32x2_t a, uint32x2_t b) {
1907   return vclt_u32(a, b);
1908 }
1909 
1910 // CHECK-LABEL: @test_vcltq_s8(
1911 // CHECK:   [[CMP_I:%.*]] = icmp slt <16 x i8> %a, %b
1912 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1913 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_s8(int8x16_t a,int8x16_t b)1914 uint8x16_t test_vcltq_s8(int8x16_t a, int8x16_t b) {
1915   return vcltq_s8(a, b);
1916 }
1917 
1918 // CHECK-LABEL: @test_vcltq_s16(
1919 // CHECK:   [[CMP_I:%.*]] = icmp slt <8 x i16> %a, %b
1920 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1921 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_s16(int16x8_t a,int16x8_t b)1922 uint16x8_t test_vcltq_s16(int16x8_t a, int16x8_t b) {
1923   return vcltq_s16(a, b);
1924 }
1925 
1926 // CHECK-LABEL: @test_vcltq_s32(
1927 // CHECK:   [[CMP_I:%.*]] = icmp slt <4 x i32> %a, %b
1928 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1929 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_s32(int32x4_t a,int32x4_t b)1930 uint32x4_t test_vcltq_s32(int32x4_t a, int32x4_t b) {
1931   return vcltq_s32(a, b);
1932 }
1933 
1934 // CHECK-LABEL: @test_vcltq_f32(
1935 // CHECK:   [[CMP_I:%.*]] = fcmp olt <4 x float> %a, %b
1936 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1937 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_f32(float32x4_t a,float32x4_t b)1938 uint32x4_t test_vcltq_f32(float32x4_t a, float32x4_t b) {
1939   return vcltq_f32(a, b);
1940 }
1941 
1942 // CHECK-LABEL: @test_vcltq_u8(
1943 // CHECK:   [[CMP_I:%.*]] = icmp ult <16 x i8> %a, %b
1944 // CHECK:   [[SEXT_I:%.*]] = sext <16 x i1> [[CMP_I]] to <16 x i8>
1945 // CHECK:   ret <16 x i8> [[SEXT_I]]
test_vcltq_u8(uint8x16_t a,uint8x16_t b)1946 uint8x16_t test_vcltq_u8(uint8x16_t a, uint8x16_t b) {
1947   return vcltq_u8(a, b);
1948 }
1949 
1950 // CHECK-LABEL: @test_vcltq_u16(
1951 // CHECK:   [[CMP_I:%.*]] = icmp ult <8 x i16> %a, %b
1952 // CHECK:   [[SEXT_I:%.*]] = sext <8 x i1> [[CMP_I]] to <8 x i16>
1953 // CHECK:   ret <8 x i16> [[SEXT_I]]
test_vcltq_u16(uint16x8_t a,uint16x8_t b)1954 uint16x8_t test_vcltq_u16(uint16x8_t a, uint16x8_t b) {
1955   return vcltq_u16(a, b);
1956 }
1957 
1958 // CHECK-LABEL: @test_vcltq_u32(
1959 // CHECK:   [[CMP_I:%.*]] = icmp ult <4 x i32> %a, %b
1960 // CHECK:   [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
1961 // CHECK:   ret <4 x i32> [[SEXT_I]]
test_vcltq_u32(uint32x4_t a,uint32x4_t b)1962 uint32x4_t test_vcltq_u32(uint32x4_t a, uint32x4_t b) {
1963   return vcltq_u32(a, b);
1964 }
1965 
1966 // CHECK-LABEL: @test_vclz_s8(
1967 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1968 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vclz_s8(int8x8_t a)1969 int8x8_t test_vclz_s8(int8x8_t a) {
1970   return vclz_s8(a);
1971 }
1972 
1973 // CHECK-LABEL: @test_vclz_s16(
1974 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1975 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
1976 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
1977 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_s16(int16x4_t a)1978 int16x4_t test_vclz_s16(int16x4_t a) {
1979   return vclz_s16(a);
1980 }
1981 
1982 // CHECK-LABEL: @test_vclz_s32(
1983 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1984 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
1985 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
1986 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_s32(int32x2_t a)1987 int32x2_t test_vclz_s32(int32x2_t a) {
1988   return vclz_s32(a);
1989 }
1990 
1991 // CHECK-LABEL: @test_vclz_u8(
1992 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> %a, i1 false)
1993 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vclz_u8(uint8x8_t a)1994 uint8x8_t test_vclz_u8(uint8x8_t a) {
1995   return vclz_u8(a);
1996 }
1997 
1998 // CHECK-LABEL: @test_vclz_u16(
1999 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
2000 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> %a, i1 false)
2001 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2002 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vclz_u16(uint16x4_t a)2003 uint16x4_t test_vclz_u16(uint16x4_t a) {
2004   return vclz_u16(a);
2005 }
2006 
2007 // CHECK-LABEL: @test_vclz_u32(
2008 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2009 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %a, i1 false)
2010 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2011 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vclz_u32(uint32x2_t a)2012 uint32x2_t test_vclz_u32(uint32x2_t a) {
2013   return vclz_u32(a);
2014 }
2015 
2016 // CHECK-LABEL: @test_vclzq_s8(
2017 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
2018 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_s8(int8x16_t a)2019 int8x16_t test_vclzq_s8(int8x16_t a) {
2020   return vclzq_s8(a);
2021 }
2022 
2023 // CHECK-LABEL: @test_vclzq_s16(
2024 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2025 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2026 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2027 // CHECK:   ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_s16(int16x8_t a)2028 int16x8_t test_vclzq_s16(int16x8_t a) {
2029   return vclzq_s16(a);
2030 }
2031 
2032 // CHECK-LABEL: @test_vclzq_s32(
2033 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2034 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2035 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2036 // CHECK:   ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_s32(int32x4_t a)2037 int32x4_t test_vclzq_s32(int32x4_t a) {
2038   return vclzq_s32(a);
2039 }
2040 
2041 // CHECK-LABEL: @test_vclzq_u8(
2042 // CHECK:   [[VCLZQ_V_I:%.*]] = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
2043 // CHECK:   ret <16 x i8> [[VCLZQ_V_I]]
test_vclzq_u8(uint8x16_t a)2044 uint8x16_t test_vclzq_u8(uint8x16_t a) {
2045   return vclzq_u8(a);
2046 }
2047 
2048 // CHECK-LABEL: @test_vclzq_u16(
2049 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2050 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
2051 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <8 x i16> [[VCLZQ_V1_I]] to <16 x i8>
2052 // CHECK:   ret <8 x i16> [[VCLZQ_V1_I]]
test_vclzq_u16(uint16x8_t a)2053 uint16x8_t test_vclzq_u16(uint16x8_t a) {
2054   return vclzq_u16(a);
2055 }
2056 
2057 // CHECK-LABEL: @test_vclzq_u32(
2058 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2059 // CHECK:   [[VCLZQ_V1_I:%.*]] = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
2060 // CHECK:   [[VCLZQ_V2_I:%.*]] = bitcast <4 x i32> [[VCLZQ_V1_I]] to <16 x i8>
2061 // CHECK:   ret <4 x i32> [[VCLZQ_V1_I]]
test_vclzq_u32(uint32x4_t a)2062 uint32x4_t test_vclzq_u32(uint32x4_t a) {
2063   return vclzq_u32(a);
2064 }
2065 
2066 // CHECK-LABEL: @test_vcnt_u8(
2067 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2068 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcnt_u8(uint8x8_t a)2069 uint8x8_t test_vcnt_u8(uint8x8_t a) {
2070   return vcnt_u8(a);
2071 }
2072 
2073 // CHECK-LABEL: @test_vcnt_s8(
2074 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2075 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcnt_s8(int8x8_t a)2076 int8x8_t test_vcnt_s8(int8x8_t a) {
2077   return vcnt_s8(a);
2078 }
2079 
2080 // CHECK-LABEL: @test_vcnt_p8(
2081 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> %a)
2082 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcnt_p8(poly8x8_t a)2083 poly8x8_t test_vcnt_p8(poly8x8_t a) {
2084   return vcnt_p8(a);
2085 }
2086 
2087 // CHECK-LABEL: @test_vcntq_u8(
2088 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2089 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_u8(uint8x16_t a)2090 uint8x16_t test_vcntq_u8(uint8x16_t a) {
2091   return vcntq_u8(a);
2092 }
2093 
2094 // CHECK-LABEL: @test_vcntq_s8(
2095 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2096 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_s8(int8x16_t a)2097 int8x16_t test_vcntq_s8(int8x16_t a) {
2098   return vcntq_s8(a);
2099 }
2100 
2101 // CHECK-LABEL: @test_vcntq_p8(
2102 // CHECK:   [[VCNTQ_V_I:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
2103 // CHECK:   ret <16 x i8> [[VCNTQ_V_I]]
test_vcntq_p8(poly8x16_t a)2104 poly8x16_t test_vcntq_p8(poly8x16_t a) {
2105   return vcntq_p8(a);
2106 }
2107 
2108 // CHECK-LABEL: @test_vcombine_s8(
2109 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2110 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_s8(int8x8_t a,int8x8_t b)2111 int8x16_t test_vcombine_s8(int8x8_t a, int8x8_t b) {
2112   return vcombine_s8(a, b);
2113 }
2114 
2115 // CHECK-LABEL: @test_vcombine_s16(
2116 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2117 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_s16(int16x4_t a,int16x4_t b)2118 int16x8_t test_vcombine_s16(int16x4_t a, int16x4_t b) {
2119   return vcombine_s16(a, b);
2120 }
2121 
2122 // CHECK-LABEL: @test_vcombine_s32(
2123 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2124 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_s32(int32x2_t a,int32x2_t b)2125 int32x4_t test_vcombine_s32(int32x2_t a, int32x2_t b) {
2126   return vcombine_s32(a, b);
2127 }
2128 
2129 // CHECK-LABEL: @test_vcombine_s64(
2130 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2131 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_s64(int64x1_t a,int64x1_t b)2132 int64x2_t test_vcombine_s64(int64x1_t a, int64x1_t b) {
2133   return vcombine_s64(a, b);
2134 }
2135 
2136 // CHECK-LABEL: @test_vcombine_f16(
2137 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2138 // CHECK:   ret <8 x half> [[SHUFFLE_I]]
test_vcombine_f16(float16x4_t a,float16x4_t b)2139 float16x8_t test_vcombine_f16(float16x4_t a, float16x4_t b) {
2140   return vcombine_f16(a, b);
2141 }
2142 
2143 // CHECK-LABEL: @test_vcombine_f32(
2144 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2145 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vcombine_f32(float32x2_t a,float32x2_t b)2146 float32x4_t test_vcombine_f32(float32x2_t a, float32x2_t b) {
2147   return vcombine_f32(a, b);
2148 }
2149 
2150 // CHECK-LABEL: @test_vcombine_u8(
2151 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2152 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_u8(uint8x8_t a,uint8x8_t b)2153 uint8x16_t test_vcombine_u8(uint8x8_t a, uint8x8_t b) {
2154   return vcombine_u8(a, b);
2155 }
2156 
2157 // CHECK-LABEL: @test_vcombine_u16(
2158 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2159 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_u16(uint16x4_t a,uint16x4_t b)2160 uint16x8_t test_vcombine_u16(uint16x4_t a, uint16x4_t b) {
2161   return vcombine_u16(a, b);
2162 }
2163 
2164 // CHECK-LABEL: @test_vcombine_u32(
2165 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2166 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vcombine_u32(uint32x2_t a,uint32x2_t b)2167 uint32x4_t test_vcombine_u32(uint32x2_t a, uint32x2_t b) {
2168   return vcombine_u32(a, b);
2169 }
2170 
2171 // CHECK-LABEL: @test_vcombine_u64(
2172 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %a, <1 x i64> %b, <2 x i32> <i32 0, i32 1>
2173 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_u64(uint64x1_t a,uint64x1_t b)2174 uint64x2_t test_vcombine_u64(uint64x1_t a, uint64x1_t b) {
2175   return vcombine_u64(a, b);
2176 }
2177 
2178 // CHECK-LABEL: @test_vcombine_p8(
2179 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
2180 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vcombine_p8(poly8x8_t a,poly8x8_t b)2181 poly8x16_t test_vcombine_p8(poly8x8_t a, poly8x8_t b) {
2182   return vcombine_p8(a, b);
2183 }
2184 
2185 // CHECK-LABEL: @test_vcombine_p16(
2186 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2187 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vcombine_p16(poly16x4_t a,poly16x4_t b)2188 poly16x8_t test_vcombine_p16(poly16x4_t a, poly16x4_t b) {
2189   return vcombine_p16(a, b);
2190 }
2191 
2192 // CHECK-LABEL: @test_vcreate_s8(
2193 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2194 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2195 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_s8(uint64_t a)2196 int8x8_t test_vcreate_s8(uint64_t a) {
2197   return vclz_s8(vcreate_s8(a));
2198 }
2199 
2200 // CHECK-LABEL: @test_vcreate_imm
2201 // CHECK: [[RES:%.*]] = bitcast i64 0 to <4 x i16>
2202 // CHECK: ret <4 x i16> [[RES]]
test_vcreate_imm(void)2203 int16x4_t test_vcreate_imm(void) {
2204   return vcreate_s16(0);
2205 }
2206 
2207 // CHECK-LABEL: @test_vcreate_s16(
2208 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2209 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2210 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2211 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2212 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_s16(uint64_t a)2213 int16x4_t test_vcreate_s16(uint64_t a) {
2214   return vclz_s16(vcreate_s16(a));
2215 }
2216 
2217 // CHECK-LABEL: @test_vcreate_s32(
2218 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2219 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2220 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2221 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2222 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_s32(uint64_t a)2223 int32x2_t test_vcreate_s32(uint64_t a) {
2224   return vclz_s32(vcreate_s32(a));
2225 }
2226 
2227 // CHECK-LABEL: @test_vcreate_f16(
2228 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x half>
2229 // CHECK:   ret <4 x half> [[TMP0]]
test_vcreate_f16(uint64_t a)2230 float16x4_t test_vcreate_f16(uint64_t a) {
2231   return vcreate_f16(a);
2232 }
2233 
2234 // CHECK-LABEL: @test_vcreate_f32(
2235 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <2 x float>
2236 // CHECK:   ret <2 x float> [[TMP0]]
test_vcreate_f32(uint64_t a)2237 float32x2_t test_vcreate_f32(uint64_t a) {
2238   return vcreate_f32(a);
2239 }
2240 
2241 // CHECK-LABEL: @test_vcreate_u8(
2242 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2243 // CHECK:   [[VCLZ_V_I:%.*]] = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> [[TMP0]], i1 false)
2244 // CHECK:   ret <8 x i8> [[VCLZ_V_I]]
test_vcreate_u8(uint64_t a)2245 int8x8_t test_vcreate_u8(uint64_t a) {
2246   return vclz_s8((int8x8_t)vcreate_u8(a));
2247 }
2248 
2249 // CHECK-LABEL: @test_vcreate_u16(
2250 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2251 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2252 // CHECK:   [[VCLZ_V1_I:%.*]] = call <4 x i16> @llvm.ctlz.v4i16(<4 x i16> [[TMP0]], i1 false)
2253 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <4 x i16> [[VCLZ_V1_I]] to <8 x i8>
2254 // CHECK:   ret <4 x i16> [[VCLZ_V1_I]]
test_vcreate_u16(uint64_t a)2255 int16x4_t test_vcreate_u16(uint64_t a) {
2256   return vclz_s16((int16x4_t)vcreate_u16(a));
2257 }
2258 
2259 // CHECK-LABEL: @test_vcreate_u32(
2260 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <2 x i32>
2261 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
2262 // CHECK:   [[VCLZ_V1_I:%.*]] = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> [[TMP0]], i1 false)
2263 // CHECK:   [[VCLZ_V2_I:%.*]] = bitcast <2 x i32> [[VCLZ_V1_I]] to <8 x i8>
2264 // CHECK:   ret <2 x i32> [[VCLZ_V1_I]]
test_vcreate_u32(uint64_t a)2265 int32x2_t test_vcreate_u32(uint64_t a) {
2266   return vclz_s32((int32x2_t)vcreate_u32(a));
2267 }
2268 
2269 // CHECK-LABEL: @test_vcreate_u64(
2270 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2271 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2272 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vcreate_u64(uint64_t a)2273 uint64x1_t test_vcreate_u64(uint64_t a) {
2274   uint64x1_t tmp = vcreate_u64(a);
2275   return vadd_u64(tmp, tmp);
2276 }
2277 
2278 // CHECK-LABEL: @test_vcreate_p8(
2279 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <8 x i8>
2280 // CHECK:   [[VCNT_V_I:%.*]] = call <8 x i8> @llvm.ctpop.v8i8(<8 x i8> [[TMP0]])
2281 // CHECK:   ret <8 x i8> [[VCNT_V_I]]
test_vcreate_p8(uint64_t a)2282 poly8x8_t test_vcreate_p8(uint64_t a) {
2283   return vcnt_p8(vcreate_p8(a));
2284 }
2285 
2286 // CHECK-LABEL: @test_vcreate_p16(
2287 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <4 x i16>
2288 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2289 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2290 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8>
2291 // CHECK:   [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]], <8 x i8> [[TMP3]])
2292 // CHECK:   [[TMP4:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x i16>
2293 // CHECK:   ret <4 x i16> [[TMP4]]
test_vcreate_p16(uint64_t a)2294 poly16x4_t test_vcreate_p16(uint64_t a) {
2295   poly16x4_t tmp = vcreate_p16(a);
2296   return vbsl_p16((uint16x4_t)tmp, tmp, tmp);
2297 }
2298 
2299 // CHECK-LABEL: @test_vcreate_s64(
2300 // CHECK:   [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
2301 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[TMP0]], [[TMP0]]
2302 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vcreate_s64(uint64_t a)2303 int64x1_t test_vcreate_s64(uint64_t a) {
2304   int64x1_t tmp = vcreate_s64(a);
2305   return vadd_s64(tmp, tmp);
2306 }
2307 
2308 // CHECK-LABEL: @test_vcvt_f16_f32(
2309 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2310 // CHECK:   [[VCVT_F16_F321_I:%.*]] = call <4 x i16> @llvm.arm.neon.vcvtfp2hf(<4 x float> %a)
2311 // CHECK:   [[VCVT_F16_F322_I:%.*]] = bitcast <4 x i16> [[VCVT_F16_F321_I]] to <8 x i8>
2312 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[VCVT_F16_F322_I]] to <4 x half>
2313 // CHECK:   ret <4 x half> [[TMP1]]
test_vcvt_f16_f32(float32x4_t a)2314 float16x4_t test_vcvt_f16_f32(float32x4_t a) {
2315   return vcvt_f16_f32(a);
2316 }
2317 
2318 // CHECK-LABEL: @test_vcvt_f32_s32(
2319 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2320 // CHECK:   [[VCVT_I:%.*]] = sitofp <2 x i32> %a to <2 x float>
2321 // CHECK:   ret <2 x float> [[VCVT_I]]
test_vcvt_f32_s32(int32x2_t a)2322 float32x2_t test_vcvt_f32_s32(int32x2_t a) {
2323   return vcvt_f32_s32(a);
2324 }
2325 
2326 // CHECK-LABEL: @test_vcvt_f32_u32(
2327 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2328 // CHECK:   [[VCVT_I:%.*]] = uitofp <2 x i32> %a to <2 x float>
2329 // CHECK:   ret <2 x float> [[VCVT_I]]
test_vcvt_f32_u32(uint32x2_t a)2330 float32x2_t test_vcvt_f32_u32(uint32x2_t a) {
2331   return vcvt_f32_u32(a);
2332 }
2333 
2334 // CHECK-LABEL: @test_vcvtq_f32_s32(
2335 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2336 // CHECK:   [[VCVT_I:%.*]] = sitofp <4 x i32> %a to <4 x float>
2337 // CHECK:   ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_s32(int32x4_t a)2338 float32x4_t test_vcvtq_f32_s32(int32x4_t a) {
2339   return vcvtq_f32_s32(a);
2340 }
2341 
2342 // CHECK-LABEL: @test_vcvtq_f32_u32(
2343 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2344 // CHECK:   [[VCVT_I:%.*]] = uitofp <4 x i32> %a to <4 x float>
2345 // CHECK:   ret <4 x float> [[VCVT_I]]
test_vcvtq_f32_u32(uint32x4_t a)2346 float32x4_t test_vcvtq_f32_u32(uint32x4_t a) {
2347   return vcvtq_f32_u32(a);
2348 }
2349 
2350 // CHECK-LABEL: @test_vcvt_f32_f16(
2351 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
2352 // CHECK:   [[VCVT_F32_F16_I:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2353 // CHECK:   [[VCVT_F32_F161_I:%.*]] = call <4 x float> @llvm.arm.neon.vcvthf2fp(<4 x i16> [[VCVT_F32_F16_I]])
2354 // CHECK:   [[VCVT_F32_F162_I:%.*]] = bitcast <4 x float> [[VCVT_F32_F161_I]] to <16 x i8>
2355 // CHECK:   ret <4 x float> [[VCVT_F32_F161_I]]
test_vcvt_f32_f16(float16x4_t a)2356 float32x4_t test_vcvt_f32_f16(float16x4_t a) {
2357   return vcvt_f32_f16(a);
2358 }
2359 
2360 // CHECK-LABEL: @test_vcvt_n_f32_s32(
2361 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2362 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2363 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2364 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_s32(int32x2_t a)2365 float32x2_t test_vcvt_n_f32_s32(int32x2_t a) {
2366   return vcvt_n_f32_s32(a, 1);
2367 }
2368 
2369 // CHECK-LABEL: @test_vcvt_n_f32_u32(
2370 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
2371 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2372 // CHECK:   [[VCVT_N1:%.*]] = call <2 x float> @llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32(<2 x i32> [[VCVT_N]], i32 1)
2373 // CHECK:   ret <2 x float> [[VCVT_N1]]
test_vcvt_n_f32_u32(uint32x2_t a)2374 float32x2_t test_vcvt_n_f32_u32(uint32x2_t a) {
2375   return vcvt_n_f32_u32(a, 1);
2376 }
2377 
2378 // CHECK-LABEL: @test_vcvtq_n_f32_s32(
2379 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2380 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2381 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2382 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_s32(int32x4_t a)2383 float32x4_t test_vcvtq_n_f32_s32(int32x4_t a) {
2384   return vcvtq_n_f32_s32(a, 3);
2385 }
2386 
2387 // CHECK-LABEL: @test_vcvtq_n_f32_u32(
2388 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
2389 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
2390 // CHECK:   [[VCVT_N1:%.*]] = call <4 x float> @llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32(<4 x i32> [[VCVT_N]], i32 3)
2391 // CHECK:   ret <4 x float> [[VCVT_N1]]
test_vcvtq_n_f32_u32(uint32x4_t a)2392 float32x4_t test_vcvtq_n_f32_u32(uint32x4_t a) {
2393   return vcvtq_n_f32_u32(a, 3);
2394 }
2395 
2396 // CHECK-LABEL: @test_vcvt_n_s32_f32(
2397 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2398 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2399 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2400 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_s32_f32(float32x2_t a)2401 int32x2_t test_vcvt_n_s32_f32(float32x2_t a) {
2402   return vcvt_n_s32_f32(a, 1);
2403 }
2404 
2405 // CHECK-LABEL: @test_vcvtq_n_s32_f32(
2406 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2407 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2408 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2409 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_s32_f32(float32x4_t a)2410 int32x4_t test_vcvtq_n_s32_f32(float32x4_t a) {
2411   return vcvtq_n_s32_f32(a, 3);
2412 }
2413 
2414 // CHECK-LABEL: @test_vcvt_n_u32_f32(
2415 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2416 // CHECK:   [[VCVT_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2417 // CHECK:   [[VCVT_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32(<2 x float> [[VCVT_N]], i32 1)
2418 // CHECK:   ret <2 x i32> [[VCVT_N1]]
test_vcvt_n_u32_f32(float32x2_t a)2419 uint32x2_t test_vcvt_n_u32_f32(float32x2_t a) {
2420   return vcvt_n_u32_f32(a, 1);
2421 }
2422 
2423 // CHECK-LABEL: @test_vcvtq_n_u32_f32(
2424 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2425 // CHECK:   [[VCVT_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
2426 // CHECK:   [[VCVT_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32(<4 x float> [[VCVT_N]], i32 3)
2427 // CHECK:   ret <4 x i32> [[VCVT_N1]]
test_vcvtq_n_u32_f32(float32x4_t a)2428 uint32x4_t test_vcvtq_n_u32_f32(float32x4_t a) {
2429   return vcvtq_n_u32_f32(a, 3);
2430 }
2431 
2432 // CHECK-LABEL: @test_vcvt_s32_f32(
2433 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2434 // CHECK:   [[VCVT_I:%.*]] = fptosi <2 x float> %a to <2 x i32>
2435 // CHECK:   ret <2 x i32> [[VCVT_I]]
test_vcvt_s32_f32(float32x2_t a)2436 int32x2_t test_vcvt_s32_f32(float32x2_t a) {
2437   return vcvt_s32_f32(a);
2438 }
2439 
2440 // CHECK-LABEL: @test_vcvtq_s32_f32(
2441 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2442 // CHECK:   [[VCVT_I:%.*]] = fptosi <4 x float> %a to <4 x i32>
2443 // CHECK:   ret <4 x i32> [[VCVT_I]]
test_vcvtq_s32_f32(float32x4_t a)2444 int32x4_t test_vcvtq_s32_f32(float32x4_t a) {
2445   return vcvtq_s32_f32(a);
2446 }
2447 
2448 // CHECK-LABEL: @test_vcvt_u32_f32(
2449 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
2450 // CHECK:   [[VCVT_I:%.*]] = fptoui <2 x float> %a to <2 x i32>
2451 // CHECK:   ret <2 x i32> [[VCVT_I]]
test_vcvt_u32_f32(float32x2_t a)2452 uint32x2_t test_vcvt_u32_f32(float32x2_t a) {
2453   return vcvt_u32_f32(a);
2454 }
2455 
2456 // CHECK-LABEL: @test_vcvtq_u32_f32(
2457 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
2458 // CHECK:   [[VCVT_I:%.*]] = fptoui <4 x float> %a to <4 x i32>
2459 // CHECK:   ret <4 x i32> [[VCVT_I]]
test_vcvtq_u32_f32(float32x4_t a)2460 uint32x4_t test_vcvtq_u32_f32(float32x4_t a) {
2461   return vcvtq_u32_f32(a);
2462 }
2463 
2464 // CHECK-LABEL: @test_vdup_lane_u8(
2465 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2466 // CHECK:   ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_u8(uint8x8_t a)2467 uint8x8_t test_vdup_lane_u8(uint8x8_t a) {
2468   return vdup_lane_u8(a, 7);
2469 }
2470 
2471 // CHECK-LABEL: @test_vdup_lane_u16(
2472 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2473 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2474 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2475 // CHECK:   ret <4 x i16> [[LANE]]
test_vdup_lane_u16(uint16x4_t a)2476 uint16x4_t test_vdup_lane_u16(uint16x4_t a) {
2477   return vdup_lane_u16(a, 3);
2478 }
2479 
2480 // CHECK-LABEL: @test_vdup_lane_u32(
2481 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2482 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2483 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2484 // CHECK:   ret <2 x i32> [[LANE]]
test_vdup_lane_u32(uint32x2_t a)2485 uint32x2_t test_vdup_lane_u32(uint32x2_t a) {
2486   return vdup_lane_u32(a, 1);
2487 }
2488 
2489 // CHECK-LABEL: @test_vdup_lane_s8(
2490 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2491 // CHECK:   ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_s8(int8x8_t a)2492 int8x8_t test_vdup_lane_s8(int8x8_t a) {
2493   return vdup_lane_s8(a, 7);
2494 }
2495 
2496 // CHECK-LABEL: @test_vdup_lane_s16(
2497 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2498 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2499 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2500 // CHECK:   ret <4 x i16> [[LANE]]
test_vdup_lane_s16(int16x4_t a)2501 int16x4_t test_vdup_lane_s16(int16x4_t a) {
2502   return vdup_lane_s16(a, 3);
2503 }
2504 
2505 // CHECK-LABEL: @test_vdup_lane_s32(
2506 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2507 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2508 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
2509 // CHECK:   ret <2 x i32> [[LANE]]
test_vdup_lane_s32(int32x2_t a)2510 int32x2_t test_vdup_lane_s32(int32x2_t a) {
2511   return vdup_lane_s32(a, 1);
2512 }
2513 
2514 // CHECK-LABEL: @test_vdup_lane_p8(
2515 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2516 // CHECK:   ret <8 x i8> [[SHUFFLE]]
test_vdup_lane_p8(poly8x8_t a)2517 poly8x8_t test_vdup_lane_p8(poly8x8_t a) {
2518   return vdup_lane_p8(a, 7);
2519 }
2520 
2521 // CHECK-LABEL: @test_vdup_lane_p16(
2522 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2523 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2524 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
2525 // CHECK:   ret <4 x i16> [[LANE]]
test_vdup_lane_p16(poly16x4_t a)2526 poly16x4_t test_vdup_lane_p16(poly16x4_t a) {
2527   return vdup_lane_p16(a, 3);
2528 }
2529 
2530 // CHECK-LABEL: @test_vdup_lane_f32(
2531 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2532 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2533 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
2534 // CHECK:   ret <2 x float> [[LANE]]
test_vdup_lane_f32(float32x2_t a)2535 float32x2_t test_vdup_lane_f32(float32x2_t a) {
2536   return vdup_lane_f32(a, 1);
2537 }
2538 
2539 // CHECK-LABEL: @test_vdupq_lane_u8(
2540 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2541 // CHECK:   ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_u8(uint8x8_t a)2542 uint8x16_t test_vdupq_lane_u8(uint8x8_t a) {
2543   return vdupq_lane_u8(a, 7);
2544 }
2545 
2546 // CHECK-LABEL: @test_vdupq_lane_u16(
2547 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2548 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2549 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2550 // CHECK:   ret <8 x i16> [[LANE]]
test_vdupq_lane_u16(uint16x4_t a)2551 uint16x8_t test_vdupq_lane_u16(uint16x4_t a) {
2552   return vdupq_lane_u16(a, 3);
2553 }
2554 
2555 // CHECK-LABEL: @test_vdupq_lane_u32(
2556 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2557 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2558 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2559 // CHECK:   ret <4 x i32> [[LANE]]
test_vdupq_lane_u32(uint32x2_t a)2560 uint32x4_t test_vdupq_lane_u32(uint32x2_t a) {
2561   return vdupq_lane_u32(a, 1);
2562 }
2563 
2564 // CHECK-LABEL: @test_vdupq_lane_s8(
2565 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2566 // CHECK:   ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_s8(int8x8_t a)2567 int8x16_t test_vdupq_lane_s8(int8x8_t a) {
2568   return vdupq_lane_s8(a, 7);
2569 }
2570 
2571 // CHECK-LABEL: @test_vdupq_lane_s16(
2572 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2573 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2574 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2575 // CHECK:   ret <8 x i16> [[LANE]]
test_vdupq_lane_s16(int16x4_t a)2576 int16x8_t test_vdupq_lane_s16(int16x4_t a) {
2577   return vdupq_lane_s16(a, 3);
2578 }
2579 
2580 // CHECK-LABEL: @test_vdupq_lane_s32(
2581 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
2582 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
2583 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2584 // CHECK:   ret <4 x i32> [[LANE]]
test_vdupq_lane_s32(int32x2_t a)2585 int32x4_t test_vdupq_lane_s32(int32x2_t a) {
2586   return vdupq_lane_s32(a, 1);
2587 }
2588 
2589 // CHECK-LABEL: @test_vdupq_lane_p8(
2590 // CHECK:   [[SHUFFLE:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
2591 // CHECK:   ret <16 x i8> [[SHUFFLE]]
test_vdupq_lane_p8(poly8x8_t a)2592 poly8x16_t test_vdupq_lane_p8(poly8x8_t a) {
2593   return vdupq_lane_p8(a, 7);
2594 }
2595 
2596 // CHECK-LABEL: @test_vdupq_lane_p16(
2597 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
2598 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
2599 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
2600 // CHECK:   ret <8 x i16> [[LANE]]
test_vdupq_lane_p16(poly16x4_t a)2601 poly16x8_t test_vdupq_lane_p16(poly16x4_t a) {
2602   return vdupq_lane_p16(a, 3);
2603 }
2604 
2605 // CHECK-LABEL: @test_vdupq_lane_f32(
2606 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[A:%.*]] to <8 x i8>
2607 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
2608 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
2609 // CHECK:   ret <4 x float> [[LANE]]
test_vdupq_lane_f32(float32x2_t a)2610 float32x4_t test_vdupq_lane_f32(float32x2_t a) {
2611   return vdupq_lane_f32(a, 1);
2612 }
2613 
2614 // CHECK-LABEL: @test_vdup_lane_s64(
2615 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2616 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2617 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2618 // CHECK:   ret <1 x i64> [[LANE]]
test_vdup_lane_s64(int64x1_t a)2619 int64x1_t test_vdup_lane_s64(int64x1_t a) {
2620   return vdup_lane_s64(a, 0);
2621 }
2622 
2623 // CHECK-LABEL: @test_vdup_lane_u64(
2624 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2625 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2626 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
2627 // CHECK:   ret <1 x i64> [[LANE]]
test_vdup_lane_u64(uint64x1_t a)2628 uint64x1_t test_vdup_lane_u64(uint64x1_t a) {
2629   return vdup_lane_u64(a, 0);
2630 }
2631 
2632 // CHECK-LABEL: @test_vdupq_lane_s64(
2633 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2634 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2635 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2636 // CHECK:   ret <2 x i64> [[LANE]]
test_vdupq_lane_s64(int64x1_t a)2637 int64x2_t test_vdupq_lane_s64(int64x1_t a) {
2638   return vdupq_lane_s64(a, 0);
2639 }
2640 
2641 // CHECK-LABEL: @test_vdupq_lane_u64(
2642 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8>
2643 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
2644 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
2645 // CHECK:   ret <2 x i64> [[LANE]]
test_vdupq_lane_u64(uint64x1_t a)2646 uint64x2_t test_vdupq_lane_u64(uint64x1_t a) {
2647   return vdupq_lane_u64(a, 0);
2648 }
2649 
2650 // CHECK-LABEL: @test_vdup_n_u8(
2651 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2652 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2653 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2654 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2655 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2656 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2657 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2658 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2659 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_u8(uint8_t a)2660 uint8x8_t test_vdup_n_u8(uint8_t a) {
2661   return vdup_n_u8(a);
2662 }
2663 
2664 // CHECK-LABEL: @test_vdup_n_u16(
2665 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2666 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2667 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2668 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2669 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_u16(uint16_t a)2670 uint16x4_t test_vdup_n_u16(uint16_t a) {
2671   return vdup_n_u16(a);
2672 }
2673 
2674 // CHECK-LABEL: @test_vdup_n_u32(
2675 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2676 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2677 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_u32(uint32_t a)2678 uint32x2_t test_vdup_n_u32(uint32_t a) {
2679   return vdup_n_u32(a);
2680 }
2681 
2682 // CHECK-LABEL: @test_vdup_n_s8(
2683 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2684 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2685 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2686 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2687 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2688 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2689 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2690 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2691 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_s8(int8_t a)2692 int8x8_t test_vdup_n_s8(int8_t a) {
2693   return vdup_n_s8(a);
2694 }
2695 
2696 // CHECK-LABEL: @test_vdup_n_s16(
2697 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2698 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2699 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2700 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2701 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_s16(int16_t a)2702 int16x4_t test_vdup_n_s16(int16_t a) {
2703   return vdup_n_s16(a);
2704 }
2705 
2706 // CHECK-LABEL: @test_vdup_n_s32(
2707 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
2708 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
2709 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vdup_n_s32(int32_t a)2710 int32x2_t test_vdup_n_s32(int32_t a) {
2711   return vdup_n_s32(a);
2712 }
2713 
2714 // CHECK-LABEL: @test_vdup_n_p8(
2715 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
2716 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
2717 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
2718 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
2719 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
2720 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
2721 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
2722 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
2723 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vdup_n_p8(poly8_t a)2724 poly8x8_t test_vdup_n_p8(poly8_t a) {
2725   return vdup_n_p8(a);
2726 }
2727 
2728 // CHECK-LABEL: @test_vdup_n_p16(
2729 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
2730 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
2731 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
2732 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
2733 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vdup_n_p16(poly16_t a)2734 poly16x4_t test_vdup_n_p16(poly16_t a) {
2735   return vdup_n_p16(a);
2736 }
2737 
2738 // CHECK-LABEL: @test_vdup_n_f16(
2739 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
2740 // CHECK:   [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
2741 // CHECK:   [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
2742 // CHECK:   [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
2743 // CHECK:   [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
2744 // CHECK:   ret <4 x half> [[VECINIT3]]
test_vdup_n_f16(float16_t * a)2745 float16x4_t test_vdup_n_f16(float16_t *a) {
2746   return vdup_n_f16(*a);
2747 }
2748 
2749 // CHECK-LABEL: @test_vdup_n_f32(
2750 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
2751 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
2752 // CHECK:   ret <2 x float> [[VECINIT1_I]]
test_vdup_n_f32(float32_t a)2753 float32x2_t test_vdup_n_f32(float32_t a) {
2754   return vdup_n_f32(a);
2755 }
2756 
2757 // CHECK-LABEL: @test_vdupq_n_u8(
2758 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2759 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2760 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2761 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2762 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2763 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2764 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2765 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2766 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2767 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2768 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2769 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2770 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2771 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2772 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2773 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2774 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_u8(uint8_t a)2775 uint8x16_t test_vdupq_n_u8(uint8_t a) {
2776   return vdupq_n_u8(a);
2777 }
2778 
2779 // CHECK-LABEL: @test_vdupq_n_u16(
2780 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2781 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2782 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2783 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2784 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2785 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2786 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2787 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2788 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_u16(uint16_t a)2789 uint16x8_t test_vdupq_n_u16(uint16_t a) {
2790   return vdupq_n_u16(a);
2791 }
2792 
2793 // CHECK-LABEL: @test_vdupq_n_u32(
2794 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2795 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2796 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2797 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2798 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_u32(uint32_t a)2799 uint32x4_t test_vdupq_n_u32(uint32_t a) {
2800   return vdupq_n_u32(a);
2801 }
2802 
2803 // CHECK-LABEL: @test_vdupq_n_s8(
2804 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2805 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2806 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2807 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2808 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2809 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2810 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2811 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2812 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2813 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2814 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2815 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2816 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2817 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2818 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2819 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2820 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_s8(int8_t a)2821 int8x16_t test_vdupq_n_s8(int8_t a) {
2822   return vdupq_n_s8(a);
2823 }
2824 
2825 // CHECK-LABEL: @test_vdupq_n_s16(
2826 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2827 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2828 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2829 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2830 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2831 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2832 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2833 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2834 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_s16(int16_t a)2835 int16x8_t test_vdupq_n_s16(int16_t a) {
2836   return vdupq_n_s16(a);
2837 }
2838 
2839 // CHECK-LABEL: @test_vdupq_n_s32(
2840 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
2841 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
2842 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
2843 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
2844 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vdupq_n_s32(int32_t a)2845 int32x4_t test_vdupq_n_s32(int32_t a) {
2846   return vdupq_n_s32(a);
2847 }
2848 
2849 // CHECK-LABEL: @test_vdupq_n_p8(
2850 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
2851 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
2852 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
2853 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
2854 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
2855 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
2856 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
2857 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
2858 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
2859 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
2860 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
2861 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
2862 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
2863 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
2864 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
2865 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
2866 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vdupq_n_p8(poly8_t a)2867 poly8x16_t test_vdupq_n_p8(poly8_t a) {
2868   return vdupq_n_p8(a);
2869 }
2870 
2871 // CHECK-LABEL: @test_vdupq_n_p16(
2872 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
2873 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
2874 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
2875 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
2876 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
2877 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
2878 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
2879 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
2880 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vdupq_n_p16(poly16_t a)2881 poly16x8_t test_vdupq_n_p16(poly16_t a) {
2882   return vdupq_n_p16(a);
2883 }
2884 
2885 // CHECK-LABEL: @test_vdupq_n_f16(
2886 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
2887 // CHECK:   [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
2888 // CHECK:   [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
2889 // CHECK:   [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
2890 // CHECK:   [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
2891 // CHECK:   [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
2892 // CHECK:   [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
2893 // CHECK:   [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
2894 // CHECK:   [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
2895 // CHECK:   ret <8 x half> [[VECINIT7]]
test_vdupq_n_f16(float16_t * a)2896 float16x8_t test_vdupq_n_f16(float16_t *a) {
2897   return vdupq_n_f16(*a);
2898 }
2899 
2900 // CHECK-LABEL: @test_vdupq_n_f32(
2901 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
2902 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
2903 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
2904 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
2905 // CHECK:   ret <4 x float> [[VECINIT3_I]]
test_vdupq_n_f32(float32_t a)2906 float32x4_t test_vdupq_n_f32(float32_t a) {
2907   return vdupq_n_f32(a);
2908 }
2909 
2910 // CHECK-LABEL: @test_vdup_n_s64(
2911 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2912 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2913 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vdup_n_s64(int64_t a)2914 int64x1_t test_vdup_n_s64(int64_t a) {
2915   int64x1_t tmp = vdup_n_s64(a);
2916   return vadd_s64(tmp, tmp);
2917 }
2918 
2919 // CHECK-LABEL: @test_vdup_n_u64(
2920 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
2921 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
2922 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vdup_n_u64(uint64_t a)2923 int64x1_t test_vdup_n_u64(uint64_t a) {
2924   int64x1_t tmp = (int64x1_t)vdup_n_u64(a);
2925   return vadd_s64(tmp, tmp);
2926 }
2927 
2928 // CHECK-LABEL: @test_vdupq_n_s64(
2929 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2930 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2931 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2932 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vdupq_n_s64(int64_t a)2933 int64x2_t test_vdupq_n_s64(int64_t a) {
2934   int64x2_t tmp = vdupq_n_s64(a);
2935   return vaddq_s64(tmp, tmp);
2936 }
2937 
2938 // CHECK-LABEL: @test_vdupq_n_u64(
2939 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
2940 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
2941 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> [[VECINIT1_I]], [[VECINIT1_I]]
2942 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vdupq_n_u64(uint64_t a)2943 uint64x2_t test_vdupq_n_u64(uint64_t a) {
2944   uint64x2_t tmp = vdupq_n_u64(a);
2945   return vaddq_u64(tmp, tmp);
2946 }
2947 
2948 // CHECK-LABEL: @test_veor_s8(
2949 // CHECK:   [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2950 // CHECK:   ret <8 x i8> [[XOR_I]]
test_veor_s8(int8x8_t a,int8x8_t b)2951 int8x8_t test_veor_s8(int8x8_t a, int8x8_t b) {
2952   return veor_s8(a, b);
2953 }
2954 
2955 // CHECK-LABEL: @test_veor_s16(
2956 // CHECK:   [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2957 // CHECK:   ret <4 x i16> [[XOR_I]]
test_veor_s16(int16x4_t a,int16x4_t b)2958 int16x4_t test_veor_s16(int16x4_t a, int16x4_t b) {
2959   return veor_s16(a, b);
2960 }
2961 
2962 // CHECK-LABEL: @test_veor_s32(
2963 // CHECK:   [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2964 // CHECK:   ret <2 x i32> [[XOR_I]]
test_veor_s32(int32x2_t a,int32x2_t b)2965 int32x2_t test_veor_s32(int32x2_t a, int32x2_t b) {
2966   return veor_s32(a, b);
2967 }
2968 
2969 // CHECK-LABEL: @test_veor_s64(
2970 // CHECK:   [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2971 // CHECK:   ret <1 x i64> [[XOR_I]]
test_veor_s64(int64x1_t a,int64x1_t b)2972 int64x1_t test_veor_s64(int64x1_t a, int64x1_t b) {
2973   return veor_s64(a, b);
2974 }
2975 
2976 // CHECK-LABEL: @test_veor_u8(
2977 // CHECK:   [[XOR_I:%.*]] = xor <8 x i8> %a, %b
2978 // CHECK:   ret <8 x i8> [[XOR_I]]
test_veor_u8(uint8x8_t a,uint8x8_t b)2979 uint8x8_t test_veor_u8(uint8x8_t a, uint8x8_t b) {
2980   return veor_u8(a, b);
2981 }
2982 
2983 // CHECK-LABEL: @test_veor_u16(
2984 // CHECK:   [[XOR_I:%.*]] = xor <4 x i16> %a, %b
2985 // CHECK:   ret <4 x i16> [[XOR_I]]
test_veor_u16(uint16x4_t a,uint16x4_t b)2986 uint16x4_t test_veor_u16(uint16x4_t a, uint16x4_t b) {
2987   return veor_u16(a, b);
2988 }
2989 
2990 // CHECK-LABEL: @test_veor_u32(
2991 // CHECK:   [[XOR_I:%.*]] = xor <2 x i32> %a, %b
2992 // CHECK:   ret <2 x i32> [[XOR_I]]
test_veor_u32(uint32x2_t a,uint32x2_t b)2993 uint32x2_t test_veor_u32(uint32x2_t a, uint32x2_t b) {
2994   return veor_u32(a, b);
2995 }
2996 
2997 // CHECK-LABEL: @test_veor_u64(
2998 // CHECK:   [[XOR_I:%.*]] = xor <1 x i64> %a, %b
2999 // CHECK:   ret <1 x i64> [[XOR_I]]
test_veor_u64(uint64x1_t a,uint64x1_t b)3000 uint64x1_t test_veor_u64(uint64x1_t a, uint64x1_t b) {
3001   return veor_u64(a, b);
3002 }
3003 
3004 // CHECK-LABEL: @test_veorq_s8(
3005 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3006 // CHECK:   ret <16 x i8> [[XOR_I]]
test_veorq_s8(int8x16_t a,int8x16_t b)3007 int8x16_t test_veorq_s8(int8x16_t a, int8x16_t b) {
3008   return veorq_s8(a, b);
3009 }
3010 
3011 // CHECK-LABEL: @test_veorq_s16(
3012 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3013 // CHECK:   ret <8 x i16> [[XOR_I]]
test_veorq_s16(int16x8_t a,int16x8_t b)3014 int16x8_t test_veorq_s16(int16x8_t a, int16x8_t b) {
3015   return veorq_s16(a, b);
3016 }
3017 
3018 // CHECK-LABEL: @test_veorq_s32(
3019 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3020 // CHECK:   ret <4 x i32> [[XOR_I]]
test_veorq_s32(int32x4_t a,int32x4_t b)3021 int32x4_t test_veorq_s32(int32x4_t a, int32x4_t b) {
3022   return veorq_s32(a, b);
3023 }
3024 
3025 // CHECK-LABEL: @test_veorq_s64(
3026 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3027 // CHECK:   ret <2 x i64> [[XOR_I]]
test_veorq_s64(int64x2_t a,int64x2_t b)3028 int64x2_t test_veorq_s64(int64x2_t a, int64x2_t b) {
3029   return veorq_s64(a, b);
3030 }
3031 
3032 // CHECK-LABEL: @test_veorq_u8(
3033 // CHECK:   [[XOR_I:%.*]] = xor <16 x i8> %a, %b
3034 // CHECK:   ret <16 x i8> [[XOR_I]]
test_veorq_u8(uint8x16_t a,uint8x16_t b)3035 uint8x16_t test_veorq_u8(uint8x16_t a, uint8x16_t b) {
3036   return veorq_u8(a, b);
3037 }
3038 
3039 // CHECK-LABEL: @test_veorq_u16(
3040 // CHECK:   [[XOR_I:%.*]] = xor <8 x i16> %a, %b
3041 // CHECK:   ret <8 x i16> [[XOR_I]]
test_veorq_u16(uint16x8_t a,uint16x8_t b)3042 uint16x8_t test_veorq_u16(uint16x8_t a, uint16x8_t b) {
3043   return veorq_u16(a, b);
3044 }
3045 
3046 // CHECK-LABEL: @test_veorq_u32(
3047 // CHECK:   [[XOR_I:%.*]] = xor <4 x i32> %a, %b
3048 // CHECK:   ret <4 x i32> [[XOR_I]]
test_veorq_u32(uint32x4_t a,uint32x4_t b)3049 uint32x4_t test_veorq_u32(uint32x4_t a, uint32x4_t b) {
3050   return veorq_u32(a, b);
3051 }
3052 
3053 // CHECK-LABEL: @test_veorq_u64(
3054 // CHECK:   [[XOR_I:%.*]] = xor <2 x i64> %a, %b
3055 // CHECK:   ret <2 x i64> [[XOR_I]]
test_veorq_u64(uint64x2_t a,uint64x2_t b)3056 uint64x2_t test_veorq_u64(uint64x2_t a, uint64x2_t b) {
3057   return veorq_u64(a, b);
3058 }
3059 
3060 // CHECK-LABEL: @test_vext_s8(
3061 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3062 // CHECK:   ret <8 x i8> [[VEXT]]
test_vext_s8(int8x8_t a,int8x8_t b)3063 int8x8_t test_vext_s8(int8x8_t a, int8x8_t b) {
3064   return vext_s8(a, b, 7);
3065 }
3066 
3067 // CHECK-LABEL: @test_vext_u8(
3068 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3069 // CHECK:   ret <8 x i8> [[VEXT]]
test_vext_u8(uint8x8_t a,uint8x8_t b)3070 uint8x8_t test_vext_u8(uint8x8_t a, uint8x8_t b) {
3071   return vext_u8(a, b, 7);
3072 }
3073 
3074 // CHECK-LABEL: @test_vext_p8(
3075 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3076 // CHECK:   ret <8 x i8> [[VEXT]]
test_vext_p8(poly8x8_t a,poly8x8_t b)3077 poly8x8_t test_vext_p8(poly8x8_t a, poly8x8_t b) {
3078   return vext_p8(a, b, 7);
3079 }
3080 
3081 // CHECK-LABEL: @test_vext_s16(
3082 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3083 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3084 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3085 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3086 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3087 // CHECK:   ret <4 x i16> [[VEXT]]
test_vext_s16(int16x4_t a,int16x4_t b)3088 int16x4_t test_vext_s16(int16x4_t a, int16x4_t b) {
3089   return vext_s16(a, b, 3);
3090 }
3091 
3092 // CHECK-LABEL: @test_vext_u16(
3093 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3094 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3095 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3096 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3097 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3098 // CHECK:   ret <4 x i16> [[VEXT]]
test_vext_u16(uint16x4_t a,uint16x4_t b)3099 uint16x4_t test_vext_u16(uint16x4_t a, uint16x4_t b) {
3100   return vext_u16(a, b, 3);
3101 }
3102 
3103 // CHECK-LABEL: @test_vext_p16(
3104 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3105 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3106 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
3107 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
3108 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3109 // CHECK:   ret <4 x i16> [[VEXT]]
test_vext_p16(poly16x4_t a,poly16x4_t b)3110 poly16x4_t test_vext_p16(poly16x4_t a, poly16x4_t b) {
3111   return vext_p16(a, b, 3);
3112 }
3113 
3114 // CHECK-LABEL: @test_vext_s32(
3115 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3116 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3117 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3118 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3119 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3120 // CHECK:   ret <2 x i32> [[VEXT]]
test_vext_s32(int32x2_t a,int32x2_t b)3121 int32x2_t test_vext_s32(int32x2_t a, int32x2_t b) {
3122   return vext_s32(a, b, 1);
3123 }
3124 
3125 // CHECK-LABEL: @test_vext_u32(
3126 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3127 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3128 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
3129 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
3130 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 2>
3131 // CHECK:   ret <2 x i32> [[VEXT]]
test_vext_u32(uint32x2_t a,uint32x2_t b)3132 uint32x2_t test_vext_u32(uint32x2_t a, uint32x2_t b) {
3133   return vext_u32(a, b, 1);
3134 }
3135 
3136 // CHECK-LABEL: @test_vext_s64(
3137 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3138 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3139 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3140 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3141 // CHECK:   [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3142 // CHECK:   ret <1 x i64> [[VEXT]]
test_vext_s64(int64x1_t a,int64x1_t b)3143 int64x1_t test_vext_s64(int64x1_t a, int64x1_t b) {
3144   return vext_s64(a, b, 0);
3145 }
3146 
3147 // CHECK-LABEL: @test_vext_u64(
3148 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
3149 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
3150 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
3151 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
3152 // CHECK:   [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
3153 // CHECK:   ret <1 x i64> [[VEXT]]
test_vext_u64(uint64x1_t a,uint64x1_t b)3154 uint64x1_t test_vext_u64(uint64x1_t a, uint64x1_t b) {
3155   return vext_u64(a, b, 0);
3156 }
3157 
3158 // CHECK-LABEL: @test_vext_f32(
3159 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3160 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3161 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
3162 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
3163 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> [[TMP3]], <2 x i32> <i32 1, i32 2>
3164 // CHECK:   ret <2 x float> [[VEXT]]
test_vext_f32(float32x2_t a,float32x2_t b)3165 float32x2_t test_vext_f32(float32x2_t a, float32x2_t b) {
3166   return vext_f32(a, b, 1);
3167 }
3168 
3169 // CHECK-LABEL: @test_vextq_s8(
3170 // CHECK:   [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3171 // CHECK:   ret <16 x i8> [[VEXT]]
test_vextq_s8(int8x16_t a,int8x16_t b)3172 int8x16_t test_vextq_s8(int8x16_t a, int8x16_t b) {
3173   return vextq_s8(a, b, 15);
3174 }
3175 
3176 // CHECK-LABEL: @test_vextq_u8(
3177 // CHECK:   [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3178 // CHECK:   ret <16 x i8> [[VEXT]]
test_vextq_u8(uint8x16_t a,uint8x16_t b)3179 uint8x16_t test_vextq_u8(uint8x16_t a, uint8x16_t b) {
3180   return vextq_u8(a, b, 15);
3181 }
3182 
3183 // CHECK-LABEL: @test_vextq_p8(
3184 // CHECK:   [[VEXT:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30>
3185 // CHECK:   ret <16 x i8> [[VEXT]]
test_vextq_p8(poly8x16_t a,poly8x16_t b)3186 poly8x16_t test_vextq_p8(poly8x16_t a, poly8x16_t b) {
3187   return vextq_p8(a, b, 15);
3188 }
3189 
3190 // CHECK-LABEL: @test_vextq_s16(
3191 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3192 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3193 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3194 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3195 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3196 // CHECK:   ret <8 x i16> [[VEXT]]
test_vextq_s16(int16x8_t a,int16x8_t b)3197 int16x8_t test_vextq_s16(int16x8_t a, int16x8_t b) {
3198   return vextq_s16(a, b, 7);
3199 }
3200 
3201 // CHECK-LABEL: @test_vextq_u16(
3202 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3203 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3204 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3205 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3206 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3207 // CHECK:   ret <8 x i16> [[VEXT]]
test_vextq_u16(uint16x8_t a,uint16x8_t b)3208 uint16x8_t test_vextq_u16(uint16x8_t a, uint16x8_t b) {
3209   return vextq_u16(a, b, 7);
3210 }
3211 
3212 // CHECK-LABEL: @test_vextq_p16(
3213 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3214 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3215 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
3216 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
3217 // CHECK:   [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
3218 // CHECK:   ret <8 x i16> [[VEXT]]
test_vextq_p16(poly16x8_t a,poly16x8_t b)3219 poly16x8_t test_vextq_p16(poly16x8_t a, poly16x8_t b) {
3220   return vextq_p16(a, b, 7);
3221 }
3222 
3223 // CHECK-LABEL: @test_vextq_s32(
3224 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3225 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3226 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3227 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3228 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3229 // CHECK:   ret <4 x i32> [[VEXT]]
test_vextq_s32(int32x4_t a,int32x4_t b)3230 int32x4_t test_vextq_s32(int32x4_t a, int32x4_t b) {
3231   return vextq_s32(a, b, 3);
3232 }
3233 
3234 // CHECK-LABEL: @test_vextq_u32(
3235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3236 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3237 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
3238 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
3239 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3240 // CHECK:   ret <4 x i32> [[VEXT]]
test_vextq_u32(uint32x4_t a,uint32x4_t b)3241 uint32x4_t test_vextq_u32(uint32x4_t a, uint32x4_t b) {
3242   return vextq_u32(a, b, 3);
3243 }
3244 
3245 // CHECK-LABEL: @test_vextq_s64(
3246 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3247 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3248 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3249 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3250 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3251 // CHECK:   ret <2 x i64> [[VEXT]]
test_vextq_s64(int64x2_t a,int64x2_t b)3252 int64x2_t test_vextq_s64(int64x2_t a, int64x2_t b) {
3253   return vextq_s64(a, b, 1);
3254 }
3255 
3256 // CHECK-LABEL: @test_vextq_u64(
3257 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
3258 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
3259 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
3260 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
3261 // CHECK:   [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
3262 // CHECK:   ret <2 x i64> [[VEXT]]
test_vextq_u64(uint64x2_t a,uint64x2_t b)3263 uint64x2_t test_vextq_u64(uint64x2_t a, uint64x2_t b) {
3264   return vextq_u64(a, b, 1);
3265 }
3266 
3267 // CHECK-LABEL: @test_vextq_f32(
3268 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3269 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3270 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float>
3271 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
3272 // CHECK:   [[VEXT:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3273 // CHECK:   ret <4 x float> [[VEXT]]
test_vextq_f32(float32x4_t a,float32x4_t b)3274 float32x4_t test_vextq_f32(float32x4_t a, float32x4_t b) {
3275   return vextq_f32(a, b, 3);
3276 }
3277 
3278 // CHECK-LABEL: @test_vfma_f32(
3279 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3280 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
3281 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3282 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> %b, <2 x float> %c, <2 x float> %a)
3283 // CHECK:   ret <2 x float> [[TMP3]]
test_vfma_f32(float32x2_t a,float32x2_t b,float32x2_t c)3284 float32x2_t test_vfma_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3285   return vfma_f32(a, b, c);
3286 }
3287 
3288 // CHECK-LABEL: @test_vfmaq_f32(
3289 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3290 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
3291 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3292 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %b, <4 x float> %c, <4 x float> %a)
3293 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3294 float32x4_t test_vfmaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3295   return vfmaq_f32(a, b, c);
3296 }
3297 
3298 // CHECK-LABEL: @test_vfms_f32(
3299 // CHECK:   [[SUB_I:%.*]] = fneg <2 x float> %b
3300 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
3301 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> [[SUB_I]] to <8 x i8>
3302 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %c to <8 x i8>
3303 // CHECK:   [[TMP3:%.*]] = call <2 x float> @llvm.fma.v2f32(<2 x float> [[SUB_I]], <2 x float> %c, <2 x float> %a)
3304 // CHECK:   ret <2 x float> [[TMP3]]
test_vfms_f32(float32x2_t a,float32x2_t b,float32x2_t c)3305 float32x2_t test_vfms_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
3306   return vfms_f32(a, b, c);
3307 }
3308 
3309 // CHECK-LABEL: @test_vfmsq_f32(
3310 // CHECK:   [[SUB_I:%.*]] = fneg <4 x float> %b
3311 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
3312 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> [[SUB_I]] to <16 x i8>
3313 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %c to <16 x i8>
3314 // CHECK:   [[TMP3:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[SUB_I]], <4 x float> %c, <4 x float> %a)
3315 // CHECK:   ret <4 x float> [[TMP3]]
test_vfmsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)3316 float32x4_t test_vfmsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
3317   return vfmsq_f32(a, b, c);
3318 }
3319 
3320 // CHECK-LABEL: @test_vget_high_s8(
3321 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3322 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_s8(int8x16_t a)3323 int8x8_t test_vget_high_s8(int8x16_t a) {
3324   return vget_high_s8(a);
3325 }
3326 
3327 // CHECK-LABEL: @test_vget_high_s16(
3328 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3329 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_s16(int16x8_t a)3330 int16x4_t test_vget_high_s16(int16x8_t a) {
3331   return vget_high_s16(a);
3332 }
3333 
3334 // CHECK-LABEL: @test_vget_high_s32(
3335 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3336 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_s32(int32x4_t a)3337 int32x2_t test_vget_high_s32(int32x4_t a) {
3338   return vget_high_s32(a);
3339 }
3340 
3341 // CHECK-LABEL: @test_vget_high_s64(
3342 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3343 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_s64(int64x2_t a)3344 int64x1_t test_vget_high_s64(int64x2_t a) {
3345   return vget_high_s64(a);
3346 }
3347 
3348 // CHECK-LABEL: @test_vget_high_f16(
3349 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3350 // CHECK:   ret <4 x half> [[SHUFFLE_I]]
test_vget_high_f16(float16x8_t a)3351 float16x4_t test_vget_high_f16(float16x8_t a) {
3352   return vget_high_f16(a);
3353 }
3354 
3355 // CHECK-LABEL: @test_vget_high_f32(
3356 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3>
3357 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vget_high_f32(float32x4_t a)3358 float32x2_t test_vget_high_f32(float32x4_t a) {
3359   return vget_high_f32(a);
3360 }
3361 
3362 // CHECK-LABEL: @test_vget_high_u8(
3363 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3364 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_u8(uint8x16_t a)3365 uint8x8_t test_vget_high_u8(uint8x16_t a) {
3366   return vget_high_u8(a);
3367 }
3368 
3369 // CHECK-LABEL: @test_vget_high_u16(
3370 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3371 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_u16(uint16x8_t a)3372 uint16x4_t test_vget_high_u16(uint16x8_t a) {
3373   return vget_high_u16(a);
3374 }
3375 
3376 // CHECK-LABEL: @test_vget_high_u32(
3377 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3>
3378 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_high_u32(uint32x4_t a)3379 uint32x2_t test_vget_high_u32(uint32x4_t a) {
3380   return vget_high_u32(a);
3381 }
3382 
3383 // CHECK-LABEL: @test_vget_high_u64(
3384 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1>
3385 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_high_u64(uint64x2_t a)3386 uint64x1_t test_vget_high_u64(uint64x2_t a) {
3387   return vget_high_u64(a);
3388 }
3389 
3390 // CHECK-LABEL: @test_vget_high_p8(
3391 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
3392 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_high_p8(poly8x16_t a)3393 poly8x8_t test_vget_high_p8(poly8x16_t a) {
3394   return vget_high_p8(a);
3395 }
3396 
3397 // CHECK-LABEL: @test_vget_high_p16(
3398 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
3399 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_high_p16(poly16x8_t a)3400 poly16x4_t test_vget_high_p16(poly16x8_t a) {
3401   return vget_high_p16(a);
3402 }
3403 
3404 // CHECK-LABEL: @test_vget_lane_u8(
3405 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3406 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_u8(uint8x8_t a)3407 uint8_t test_vget_lane_u8(uint8x8_t a) {
3408   return vget_lane_u8(a, 7);
3409 }
3410 
3411 // CHECK-LABEL: @test_vget_lane_u16(
3412 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3413 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_u16(uint16x4_t a)3414 uint16_t test_vget_lane_u16(uint16x4_t a) {
3415   return vget_lane_u16(a, 3);
3416 }
3417 
3418 // CHECK-LABEL: @test_vget_lane_u32(
3419 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3420 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_u32(uint32x2_t a)3421 uint32_t test_vget_lane_u32(uint32x2_t a) {
3422   return vget_lane_u32(a, 1);
3423 }
3424 
3425 // CHECK-LABEL: @test_vget_lane_s8(
3426 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3427 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_s8(int8x8_t a)3428 int8_t test_vget_lane_s8(int8x8_t a) {
3429   return vget_lane_s8(a, 7);
3430 }
3431 
3432 // CHECK-LABEL: @test_vget_lane_s16(
3433 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3434 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_s16(int16x4_t a)3435 int16_t test_vget_lane_s16(int16x4_t a) {
3436   return vget_lane_s16(a, 3);
3437 }
3438 
3439 // CHECK-LABEL: @test_vget_lane_s32(
3440 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1
3441 // CHECK:   ret i32 [[VGET_LANE]]
test_vget_lane_s32(int32x2_t a)3442 int32_t test_vget_lane_s32(int32x2_t a) {
3443   return vget_lane_s32(a, 1);
3444 }
3445 
3446 // CHECK-LABEL: @test_vget_lane_p8(
3447 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i8> %a, i32 7
3448 // CHECK:   ret i8 [[VGET_LANE]]
test_vget_lane_p8(poly8x8_t a)3449 poly8_t test_vget_lane_p8(poly8x8_t a) {
3450   return vget_lane_p8(a, 7);
3451 }
3452 
3453 // CHECK-LABEL: @test_vget_lane_p16(
3454 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3
3455 // CHECK:   ret i16 [[VGET_LANE]]
test_vget_lane_p16(poly16x4_t a)3456 poly16_t test_vget_lane_p16(poly16x4_t a) {
3457   return vget_lane_p16(a, 3);
3458 }
3459 
3460 // CHECK-LABEL: @test_vget_lane_f32(
3461 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1
3462 // CHECK:   ret float [[VGET_LANE]]
test_vget_lane_f32(float32x2_t a)3463 float32_t test_vget_lane_f32(float32x2_t a) {
3464   return vget_lane_f32(a, 1);
3465 }
3466 
3467 // CHECK-LABEL: @test_vget_lane_f16(
3468 // CHECK:   [[__REINT_242:%.*]] = alloca <4 x half>, align 8
3469 // CHECK:   [[__REINT1_242:%.*]] = alloca i16, align 2
3470 // CHECK:   store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8
3471 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>*
3472 // CHECK:   [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8
3473 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1
3474 // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2
3475 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half*
3476 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3477 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
3478 // CHECK:   ret float [[CONV]]
test_vget_lane_f16(float16x4_t a)3479 float32_t test_vget_lane_f16(float16x4_t a) {
3480   return vget_lane_f16(a, 1);
3481 }
3482 
3483 // CHECK-LABEL: @test_vgetq_lane_u8(
3484 // CHECK:   [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3485 // CHECK:   ret i8 [[VGET_LANE]]
test_vgetq_lane_u8(uint8x16_t a)3486 uint8_t test_vgetq_lane_u8(uint8x16_t a) {
3487   return vgetq_lane_u8(a, 15);
3488 }
3489 
3490 // CHECK-LABEL: @test_vgetq_lane_u16(
3491 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3492 // CHECK:   ret i16 [[VGET_LANE]]
test_vgetq_lane_u16(uint16x8_t a)3493 uint16_t test_vgetq_lane_u16(uint16x8_t a) {
3494   return vgetq_lane_u16(a, 7);
3495 }
3496 
3497 // CHECK-LABEL: @test_vgetq_lane_u32(
3498 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3499 // CHECK:   ret i32 [[VGET_LANE]]
test_vgetq_lane_u32(uint32x4_t a)3500 uint32_t test_vgetq_lane_u32(uint32x4_t a) {
3501   return vgetq_lane_u32(a, 3);
3502 }
3503 
3504 // CHECK-LABEL: @test_vgetq_lane_s8(
3505 // CHECK:   [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3506 // CHECK:   ret i8 [[VGET_LANE]]
test_vgetq_lane_s8(int8x16_t a)3507 int8_t test_vgetq_lane_s8(int8x16_t a) {
3508   return vgetq_lane_s8(a, 15);
3509 }
3510 
3511 // CHECK-LABEL: @test_vgetq_lane_s16(
3512 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3513 // CHECK:   ret i16 [[VGET_LANE]]
test_vgetq_lane_s16(int16x8_t a)3514 int16_t test_vgetq_lane_s16(int16x8_t a) {
3515   return vgetq_lane_s16(a, 7);
3516 }
3517 
3518 // CHECK-LABEL: @test_vgetq_lane_s32(
3519 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3
3520 // CHECK:   ret i32 [[VGET_LANE]]
test_vgetq_lane_s32(int32x4_t a)3521 int32_t test_vgetq_lane_s32(int32x4_t a) {
3522   return vgetq_lane_s32(a, 3);
3523 }
3524 
3525 // CHECK-LABEL: @test_vgetq_lane_p8(
3526 // CHECK:   [[VGET_LANE:%.*]] = extractelement <16 x i8> %a, i32 15
3527 // CHECK:   ret i8 [[VGET_LANE]]
test_vgetq_lane_p8(poly8x16_t a)3528 poly8_t test_vgetq_lane_p8(poly8x16_t a) {
3529   return vgetq_lane_p8(a, 15);
3530 }
3531 
3532 // CHECK-LABEL: @test_vgetq_lane_p16(
3533 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7
3534 // CHECK:   ret i16 [[VGET_LANE]]
test_vgetq_lane_p16(poly16x8_t a)3535 poly16_t test_vgetq_lane_p16(poly16x8_t a) {
3536   return vgetq_lane_p16(a, 7);
3537 }
3538 
3539 // CHECK-LABEL: @test_vgetq_lane_f32(
3540 // CHECK:   [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3
3541 // CHECK:   ret float [[VGET_LANE]]
test_vgetq_lane_f32(float32x4_t a)3542 float32_t test_vgetq_lane_f32(float32x4_t a) {
3543   return vgetq_lane_f32(a, 3);
3544 }
3545 
3546 // CHECK-LABEL: @test_vgetq_lane_f16(
3547 // CHECK:   [[__REINT_244:%.*]] = alloca <8 x half>, align 16
3548 // CHECK:   [[__REINT1_244:%.*]] = alloca i16, align 2
3549 // CHECK:   store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16
3550 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>*
3551 // CHECK:   [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16
3552 // CHECK:   [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3
3553 // CHECK:   store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2
3554 // CHECK:   [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half*
3555 // CHECK:   [[TMP5:%.*]] = load half, half* [[TMP4]], align 2
3556 // CHECK:   [[CONV:%.*]] = fpext half [[TMP5]] to float
3557 // CHECK:   ret float [[CONV]]
test_vgetq_lane_f16(float16x8_t a)3558 float32_t test_vgetq_lane_f16(float16x8_t a) {
3559   return vgetq_lane_f16(a, 3);
3560 }
3561 
3562 // CHECK-LABEL: @test_vget_lane_s64(
3563 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3564 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_s64(int64x1_t a)3565 int64_t test_vget_lane_s64(int64x1_t a) {
3566   return vget_lane_s64(a, 0);
3567 }
3568 
3569 // CHECK-LABEL: @test_vget_lane_u64(
3570 // CHECK:   [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0
3571 // CHECK:   ret i64 [[VGET_LANE]]
test_vget_lane_u64(uint64x1_t a)3572 uint64_t test_vget_lane_u64(uint64x1_t a) {
3573   return vget_lane_u64(a, 0);
3574 }
3575 
3576 // CHECK-LABEL: @test_vgetq_lane_s64(
3577 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3578 // CHECK:   ret i64 [[VGET_LANE]]
test_vgetq_lane_s64(int64x2_t a)3579 int64_t test_vgetq_lane_s64(int64x2_t a) {
3580   return vgetq_lane_s64(a, 1);
3581 }
3582 
3583 // CHECK-LABEL: @test_vgetq_lane_u64(
3584 // CHECK:   [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1
3585 // CHECK:   ret i64 [[VGET_LANE]]
test_vgetq_lane_u64(uint64x2_t a)3586 uint64_t test_vgetq_lane_u64(uint64x2_t a) {
3587   return vgetq_lane_u64(a, 1);
3588 }
3589 
3590 // CHECK-LABEL: @test_vget_low_s8(
3591 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3592 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_s8(int8x16_t a)3593 int8x8_t test_vget_low_s8(int8x16_t a) {
3594   return vget_low_s8(a);
3595 }
3596 
3597 // CHECK-LABEL: @test_vget_low_s16(
3598 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3599 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_s16(int16x8_t a)3600 int16x4_t test_vget_low_s16(int16x8_t a) {
3601   return vget_low_s16(a);
3602 }
3603 
3604 // CHECK-LABEL: @test_vget_low_s32(
3605 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3606 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_s32(int32x4_t a)3607 int32x2_t test_vget_low_s32(int32x4_t a) {
3608   return vget_low_s32(a);
3609 }
3610 
3611 // CHECK-LABEL: @test_vget_low_s64(
3612 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3613 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_s64(int64x2_t a)3614 int64x1_t test_vget_low_s64(int64x2_t a) {
3615   return vget_low_s64(a);
3616 }
3617 
3618 // CHECK-LABEL: @test_vget_low_f16(
3619 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3620 // CHECK:   ret <4 x half> [[SHUFFLE_I]]
test_vget_low_f16(float16x8_t a)3621 float16x4_t test_vget_low_f16(float16x8_t a) {
3622   return vget_low_f16(a);
3623 }
3624 
3625 // CHECK-LABEL: @test_vget_low_f32(
3626 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1>
3627 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vget_low_f32(float32x4_t a)3628 float32x2_t test_vget_low_f32(float32x4_t a) {
3629   return vget_low_f32(a);
3630 }
3631 
3632 // CHECK-LABEL: @test_vget_low_u8(
3633 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3634 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_u8(uint8x16_t a)3635 uint8x8_t test_vget_low_u8(uint8x16_t a) {
3636   return vget_low_u8(a);
3637 }
3638 
3639 // CHECK-LABEL: @test_vget_low_u16(
3640 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3641 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_u16(uint16x8_t a)3642 uint16x4_t test_vget_low_u16(uint16x8_t a) {
3643   return vget_low_u16(a);
3644 }
3645 
3646 // CHECK-LABEL: @test_vget_low_u32(
3647 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1>
3648 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vget_low_u32(uint32x4_t a)3649 uint32x2_t test_vget_low_u32(uint32x4_t a) {
3650   return vget_low_u32(a);
3651 }
3652 
3653 // CHECK-LABEL: @test_vget_low_u64(
3654 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer
3655 // CHECK:   ret <1 x i64> [[SHUFFLE_I]]
test_vget_low_u64(uint64x2_t a)3656 uint64x1_t test_vget_low_u64(uint64x2_t a) {
3657   return vget_low_u64(a);
3658 }
3659 
3660 // CHECK-LABEL: @test_vget_low_p8(
3661 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
3662 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vget_low_p8(poly8x16_t a)3663 poly8x8_t test_vget_low_p8(poly8x16_t a) {
3664   return vget_low_p8(a);
3665 }
3666 
3667 // CHECK-LABEL: @test_vget_low_p16(
3668 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
3669 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vget_low_p16(poly16x8_t a)3670 poly16x4_t test_vget_low_p16(poly16x8_t a) {
3671   return vget_low_p16(a);
3672 }
3673 
3674 // CHECK-LABEL: @test_vhadd_s8(
3675 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
3676 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_s8(int8x8_t a,int8x8_t b)3677 int8x8_t test_vhadd_s8(int8x8_t a, int8x8_t b) {
3678   return vhadd_s8(a, b);
3679 }
3680 
3681 // CHECK-LABEL: @test_vhadd_s16(
3682 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3683 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3684 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
3685 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3686 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_s16(int16x4_t a,int16x4_t b)3687 int16x4_t test_vhadd_s16(int16x4_t a, int16x4_t b) {
3688   return vhadd_s16(a, b);
3689 }
3690 
3691 // CHECK-LABEL: @test_vhadd_s32(
3692 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3693 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3694 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
3695 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3696 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_s32(int32x2_t a,int32x2_t b)3697 int32x2_t test_vhadd_s32(int32x2_t a, int32x2_t b) {
3698   return vhadd_s32(a, b);
3699 }
3700 
3701 // CHECK-LABEL: @test_vhadd_u8(
3702 // CHECK:   [[VHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
3703 // CHECK:   ret <8 x i8> [[VHADD_V_I]]
test_vhadd_u8(uint8x8_t a,uint8x8_t b)3704 uint8x8_t test_vhadd_u8(uint8x8_t a, uint8x8_t b) {
3705   return vhadd_u8(a, b);
3706 }
3707 
3708 // CHECK-LABEL: @test_vhadd_u16(
3709 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3710 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3711 // CHECK:   [[VHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
3712 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <4 x i16> [[VHADD_V2_I]] to <8 x i8>
3713 // CHECK:   ret <4 x i16> [[VHADD_V2_I]]
test_vhadd_u16(uint16x4_t a,uint16x4_t b)3714 uint16x4_t test_vhadd_u16(uint16x4_t a, uint16x4_t b) {
3715   return vhadd_u16(a, b);
3716 }
3717 
3718 // CHECK-LABEL: @test_vhadd_u32(
3719 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3720 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3721 // CHECK:   [[VHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
3722 // CHECK:   [[VHADD_V3_I:%.*]] = bitcast <2 x i32> [[VHADD_V2_I]] to <8 x i8>
3723 // CHECK:   ret <2 x i32> [[VHADD_V2_I]]
test_vhadd_u32(uint32x2_t a,uint32x2_t b)3724 uint32x2_t test_vhadd_u32(uint32x2_t a, uint32x2_t b) {
3725   return vhadd_u32(a, b);
3726 }
3727 
3728 // CHECK-LABEL: @test_vhaddq_s8(
3729 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
3730 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_s8(int8x16_t a,int8x16_t b)3731 int8x16_t test_vhaddq_s8(int8x16_t a, int8x16_t b) {
3732   return vhaddq_s8(a, b);
3733 }
3734 
3735 // CHECK-LABEL: @test_vhaddq_s16(
3736 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3737 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3738 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
3739 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3740 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_s16(int16x8_t a,int16x8_t b)3741 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b) {
3742   return vhaddq_s16(a, b);
3743 }
3744 
3745 // CHECK-LABEL: @test_vhaddq_s32(
3746 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3747 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3748 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
3749 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3750 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_s32(int32x4_t a,int32x4_t b)3751 int32x4_t test_vhaddq_s32(int32x4_t a, int32x4_t b) {
3752   return vhaddq_s32(a, b);
3753 }
3754 
3755 // CHECK-LABEL: @test_vhaddq_u8(
3756 // CHECK:   [[VHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
3757 // CHECK:   ret <16 x i8> [[VHADDQ_V_I]]
test_vhaddq_u8(uint8x16_t a,uint8x16_t b)3758 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b) {
3759   return vhaddq_u8(a, b);
3760 }
3761 
3762 // CHECK-LABEL: @test_vhaddq_u16(
3763 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3764 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3765 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
3766 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VHADDQ_V2_I]] to <16 x i8>
3767 // CHECK:   ret <8 x i16> [[VHADDQ_V2_I]]
test_vhaddq_u16(uint16x8_t a,uint16x8_t b)3768 uint16x8_t test_vhaddq_u16(uint16x8_t a, uint16x8_t b) {
3769   return vhaddq_u16(a, b);
3770 }
3771 
3772 // CHECK-LABEL: @test_vhaddq_u32(
3773 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3774 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3775 // CHECK:   [[VHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
3776 // CHECK:   [[VHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VHADDQ_V2_I]] to <16 x i8>
3777 // CHECK:   ret <4 x i32> [[VHADDQ_V2_I]]
test_vhaddq_u32(uint32x4_t a,uint32x4_t b)3778 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b) {
3779   return vhaddq_u32(a, b);
3780 }
3781 
3782 // CHECK-LABEL: @test_vhsub_s8(
3783 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubs.v8i8(<8 x i8> %a, <8 x i8> %b)
3784 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_s8(int8x8_t a,int8x8_t b)3785 int8x8_t test_vhsub_s8(int8x8_t a, int8x8_t b) {
3786   return vhsub_s8(a, b);
3787 }
3788 
3789 // CHECK-LABEL: @test_vhsub_s16(
3790 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3791 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3792 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubs.v4i16(<4 x i16> %a, <4 x i16> %b)
3793 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3794 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_s16(int16x4_t a,int16x4_t b)3795 int16x4_t test_vhsub_s16(int16x4_t a, int16x4_t b) {
3796   return vhsub_s16(a, b);
3797 }
3798 
3799 // CHECK-LABEL: @test_vhsub_s32(
3800 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3801 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3802 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubs.v2i32(<2 x i32> %a, <2 x i32> %b)
3803 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3804 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_s32(int32x2_t a,int32x2_t b)3805 int32x2_t test_vhsub_s32(int32x2_t a, int32x2_t b) {
3806   return vhsub_s32(a, b);
3807 }
3808 
3809 // CHECK-LABEL: @test_vhsub_u8(
3810 // CHECK:   [[VHSUB_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vhsubu.v8i8(<8 x i8> %a, <8 x i8> %b)
3811 // CHECK:   ret <8 x i8> [[VHSUB_V_I]]
test_vhsub_u8(uint8x8_t a,uint8x8_t b)3812 uint8x8_t test_vhsub_u8(uint8x8_t a, uint8x8_t b) {
3813   return vhsub_u8(a, b);
3814 }
3815 
3816 // CHECK-LABEL: @test_vhsub_u16(
3817 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
3818 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
3819 // CHECK:   [[VHSUB_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vhsubu.v4i16(<4 x i16> %a, <4 x i16> %b)
3820 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <4 x i16> [[VHSUB_V2_I]] to <8 x i8>
3821 // CHECK:   ret <4 x i16> [[VHSUB_V2_I]]
test_vhsub_u16(uint16x4_t a,uint16x4_t b)3822 uint16x4_t test_vhsub_u16(uint16x4_t a, uint16x4_t b) {
3823   return vhsub_u16(a, b);
3824 }
3825 
3826 // CHECK-LABEL: @test_vhsub_u32(
3827 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
3828 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
3829 // CHECK:   [[VHSUB_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vhsubu.v2i32(<2 x i32> %a, <2 x i32> %b)
3830 // CHECK:   [[VHSUB_V3_I:%.*]] = bitcast <2 x i32> [[VHSUB_V2_I]] to <8 x i8>
3831 // CHECK:   ret <2 x i32> [[VHSUB_V2_I]]
test_vhsub_u32(uint32x2_t a,uint32x2_t b)3832 uint32x2_t test_vhsub_u32(uint32x2_t a, uint32x2_t b) {
3833   return vhsub_u32(a, b);
3834 }
3835 
3836 // CHECK-LABEL: @test_vhsubq_s8(
3837 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubs.v16i8(<16 x i8> %a, <16 x i8> %b)
3838 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_s8(int8x16_t a,int8x16_t b)3839 int8x16_t test_vhsubq_s8(int8x16_t a, int8x16_t b) {
3840   return vhsubq_s8(a, b);
3841 }
3842 
3843 // CHECK-LABEL: @test_vhsubq_s16(
3844 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3845 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3846 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubs.v8i16(<8 x i16> %a, <8 x i16> %b)
3847 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3848 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_s16(int16x8_t a,int16x8_t b)3849 int16x8_t test_vhsubq_s16(int16x8_t a, int16x8_t b) {
3850   return vhsubq_s16(a, b);
3851 }
3852 
3853 // CHECK-LABEL: @test_vhsubq_s32(
3854 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3855 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3856 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubs.v4i32(<4 x i32> %a, <4 x i32> %b)
3857 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3858 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_s32(int32x4_t a,int32x4_t b)3859 int32x4_t test_vhsubq_s32(int32x4_t a, int32x4_t b) {
3860   return vhsubq_s32(a, b);
3861 }
3862 
3863 // CHECK-LABEL: @test_vhsubq_u8(
3864 // CHECK:   [[VHSUBQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vhsubu.v16i8(<16 x i8> %a, <16 x i8> %b)
3865 // CHECK:   ret <16 x i8> [[VHSUBQ_V_I]]
test_vhsubq_u8(uint8x16_t a,uint8x16_t b)3866 uint8x16_t test_vhsubq_u8(uint8x16_t a, uint8x16_t b) {
3867   return vhsubq_u8(a, b);
3868 }
3869 
3870 // CHECK-LABEL: @test_vhsubq_u16(
3871 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
3872 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
3873 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vhsubu.v8i16(<8 x i16> %a, <8 x i16> %b)
3874 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VHSUBQ_V2_I]] to <16 x i8>
3875 // CHECK:   ret <8 x i16> [[VHSUBQ_V2_I]]
test_vhsubq_u16(uint16x8_t a,uint16x8_t b)3876 uint16x8_t test_vhsubq_u16(uint16x8_t a, uint16x8_t b) {
3877   return vhsubq_u16(a, b);
3878 }
3879 
3880 // CHECK-LABEL: @test_vhsubq_u32(
3881 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
3882 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
3883 // CHECK:   [[VHSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vhsubu.v4i32(<4 x i32> %a, <4 x i32> %b)
3884 // CHECK:   [[VHSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VHSUBQ_V2_I]] to <16 x i8>
3885 // CHECK:   ret <4 x i32> [[VHSUBQ_V2_I]]
test_vhsubq_u32(uint32x4_t a,uint32x4_t b)3886 uint32x4_t test_vhsubq_u32(uint32x4_t a, uint32x4_t b) {
3887   return vhsubq_u32(a, b);
3888 }
3889 
3890 // CHECK-LABEL: @test_vld1q_u8(
3891 // CHECK:   [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3892 // CHECK:   ret <16 x i8> [[VLD1]]
test_vld1q_u8(uint8_t const * a)3893 uint8x16_t test_vld1q_u8(uint8_t const * a) {
3894   return vld1q_u8(a);
3895 }
3896 
3897 // CHECK-LABEL: @test_vld1q_u16(
3898 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3899 // CHECK:   [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3900 // CHECK:   ret <8 x i16> [[VLD1]]
test_vld1q_u16(uint16_t const * a)3901 uint16x8_t test_vld1q_u16(uint16_t const * a) {
3902   return vld1q_u16(a);
3903 }
3904 
3905 // CHECK-LABEL: @test_vld1q_u32(
3906 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
3907 // CHECK:   [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3908 // CHECK:   ret <4 x i32> [[VLD1]]
test_vld1q_u32(uint32_t const * a)3909 uint32x4_t test_vld1q_u32(uint32_t const * a) {
3910   return vld1q_u32(a);
3911 }
3912 
3913 // CHECK-LABEL: @test_vld1q_u64(
3914 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
3915 // CHECK:   [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3916 // CHECK:   ret <2 x i64> [[VLD1]]
test_vld1q_u64(uint64_t const * a)3917 uint64x2_t test_vld1q_u64(uint64_t const * a) {
3918   return vld1q_u64(a);
3919 }
3920 
3921 // CHECK-LABEL: @test_vld1q_s8(
3922 // CHECK:   [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3923 // CHECK:   ret <16 x i8> [[VLD1]]
test_vld1q_s8(int8_t const * a)3924 int8x16_t test_vld1q_s8(int8_t const * a) {
3925   return vld1q_s8(a);
3926 }
3927 
3928 // CHECK-LABEL: @test_vld1q_s16(
3929 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3930 // CHECK:   [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3931 // CHECK:   ret <8 x i16> [[VLD1]]
test_vld1q_s16(int16_t const * a)3932 int16x8_t test_vld1q_s16(int16_t const * a) {
3933   return vld1q_s16(a);
3934 }
3935 
3936 // CHECK-LABEL: @test_vld1q_s32(
3937 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
3938 // CHECK:   [[VLD1:%.*]] = call <4 x i32> @llvm.arm.neon.vld1.v4i32.p0i8(i8* [[TMP0]], i32 4)
3939 // CHECK:   ret <4 x i32> [[VLD1]]
test_vld1q_s32(int32_t const * a)3940 int32x4_t test_vld1q_s32(int32_t const * a) {
3941   return vld1q_s32(a);
3942 }
3943 
3944 // CHECK-LABEL: @test_vld1q_s64(
3945 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
3946 // CHECK:   [[VLD1:%.*]] = call <2 x i64> @llvm.arm.neon.vld1.v2i64.p0i8(i8* [[TMP0]], i32 4)
3947 // CHECK:   ret <2 x i64> [[VLD1]]
test_vld1q_s64(int64_t const * a)3948 int64x2_t test_vld1q_s64(int64_t const * a) {
3949   return vld1q_s64(a);
3950 }
3951 
3952 // CHECK-LABEL: @test_vld1q_f16(
3953 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
3954 // CHECK:   [[VLD1:%.*]] = call <8 x half> @llvm.arm.neon.vld1.v8f16.p0i8(i8* [[TMP0]], i32 2)
3955 // CHECK:   ret <8 x half> [[VLD1]]
test_vld1q_f16(float16_t const * a)3956 float16x8_t test_vld1q_f16(float16_t const * a) {
3957   return vld1q_f16(a);
3958 }
3959 
3960 // CHECK-LABEL: @test_vld1q_f32(
3961 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
3962 // CHECK:   [[VLD1:%.*]] = call <4 x float> @llvm.arm.neon.vld1.v4f32.p0i8(i8* [[TMP0]], i32 4)
3963 // CHECK:   ret <4 x float> [[VLD1]]
test_vld1q_f32(float32_t const * a)3964 float32x4_t test_vld1q_f32(float32_t const * a) {
3965   return vld1q_f32(a);
3966 }
3967 
3968 // CHECK-LABEL: @test_vld1q_p8(
3969 // CHECK:   [[VLD1:%.*]] = call <16 x i8> @llvm.arm.neon.vld1.v16i8.p0i8(i8* %a, i32 1)
3970 // CHECK:   ret <16 x i8> [[VLD1]]
test_vld1q_p8(poly8_t const * a)3971 poly8x16_t test_vld1q_p8(poly8_t const * a) {
3972   return vld1q_p8(a);
3973 }
3974 
3975 // CHECK-LABEL: @test_vld1q_p16(
3976 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3977 // CHECK:   [[VLD1:%.*]] = call <8 x i16> @llvm.arm.neon.vld1.v8i16.p0i8(i8* [[TMP0]], i32 2)
3978 // CHECK:   ret <8 x i16> [[VLD1]]
test_vld1q_p16(poly16_t const * a)3979 poly16x8_t test_vld1q_p16(poly16_t const * a) {
3980   return vld1q_p16(a);
3981 }
3982 
3983 // CHECK-LABEL: @test_vld1_u8(
3984 // CHECK:   [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
3985 // CHECK:   ret <8 x i8> [[VLD1]]
test_vld1_u8(uint8_t const * a)3986 uint8x8_t test_vld1_u8(uint8_t const * a) {
3987   return vld1_u8(a);
3988 }
3989 
3990 // CHECK-LABEL: @test_vld1_u16(
3991 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
3992 // CHECK:   [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
3993 // CHECK:   ret <4 x i16> [[VLD1]]
test_vld1_u16(uint16_t const * a)3994 uint16x4_t test_vld1_u16(uint16_t const * a) {
3995   return vld1_u16(a);
3996 }
3997 
3998 // CHECK-LABEL: @test_vld1_u32(
3999 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4000 // CHECK:   [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
4001 // CHECK:   ret <2 x i32> [[VLD1]]
test_vld1_u32(uint32_t const * a)4002 uint32x2_t test_vld1_u32(uint32_t const * a) {
4003   return vld1_u32(a);
4004 }
4005 
4006 // CHECK-LABEL: @test_vld1_u64(
4007 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4008 // CHECK:   [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4009 // CHECK:   ret <1 x i64> [[VLD1]]
test_vld1_u64(uint64_t const * a)4010 uint64x1_t test_vld1_u64(uint64_t const * a) {
4011   return vld1_u64(a);
4012 }
4013 
4014 // CHECK-LABEL: @test_vld1_s8(
4015 // CHECK:   [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4016 // CHECK:   ret <8 x i8> [[VLD1]]
test_vld1_s8(int8_t const * a)4017 int8x8_t test_vld1_s8(int8_t const * a) {
4018   return vld1_s8(a);
4019 }
4020 
4021 // CHECK-LABEL: @test_vld1_s16(
4022 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4023 // CHECK:   [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4024 // CHECK:   ret <4 x i16> [[VLD1]]
test_vld1_s16(int16_t const * a)4025 int16x4_t test_vld1_s16(int16_t const * a) {
4026   return vld1_s16(a);
4027 }
4028 
4029 // CHECK-LABEL: @test_vld1_s32(
4030 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4031 // CHECK:   [[VLD1:%.*]] = call <2 x i32> @llvm.arm.neon.vld1.v2i32.p0i8(i8* [[TMP0]], i32 4)
4032 // CHECK:   ret <2 x i32> [[VLD1]]
test_vld1_s32(int32_t const * a)4033 int32x2_t test_vld1_s32(int32_t const * a) {
4034   return vld1_s32(a);
4035 }
4036 
4037 // CHECK-LABEL: @test_vld1_s64(
4038 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4039 // CHECK:   [[VLD1:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4040 // CHECK:   ret <1 x i64> [[VLD1]]
test_vld1_s64(int64_t const * a)4041 int64x1_t test_vld1_s64(int64_t const * a) {
4042   return vld1_s64(a);
4043 }
4044 
4045 // CHECK-LABEL: @test_vld1_f16(
4046 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4047 // CHECK:   [[VLD1:%.*]] = call <4 x half> @llvm.arm.neon.vld1.v4f16.p0i8(i8* [[TMP0]], i32 2)
4048 // CHECK:   ret <4 x half> [[VLD1]]
test_vld1_f16(float16_t const * a)4049 float16x4_t test_vld1_f16(float16_t const * a) {
4050   return vld1_f16(a);
4051 }
4052 
4053 // CHECK-LABEL: @test_vld1_f32(
4054 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4055 // CHECK:   [[VLD1:%.*]] = call <2 x float> @llvm.arm.neon.vld1.v2f32.p0i8(i8* [[TMP0]], i32 4)
4056 // CHECK:   ret <2 x float> [[VLD1]]
test_vld1_f32(float32_t const * a)4057 float32x2_t test_vld1_f32(float32_t const * a) {
4058   return vld1_f32(a);
4059 }
4060 
4061 // CHECK-LABEL: @test_vld1_p8(
4062 // CHECK:   [[VLD1:%.*]] = call <8 x i8> @llvm.arm.neon.vld1.v8i8.p0i8(i8* %a, i32 1)
4063 // CHECK:   ret <8 x i8> [[VLD1]]
test_vld1_p8(poly8_t const * a)4064 poly8x8_t test_vld1_p8(poly8_t const * a) {
4065   return vld1_p8(a);
4066 }
4067 
4068 // CHECK-LABEL: @test_vld1_p16(
4069 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4070 // CHECK:   [[VLD1:%.*]] = call <4 x i16> @llvm.arm.neon.vld1.v4i16.p0i8(i8* [[TMP0]], i32 2)
4071 // CHECK:   ret <4 x i16> [[VLD1]]
test_vld1_p16(poly16_t const * a)4072 poly16x4_t test_vld1_p16(poly16_t const * a) {
4073   return vld1_p16(a);
4074 }
4075 
4076 // CHECK-LABEL: @test_vld1q_dup_u8(
4077 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4078 // CHECK:   [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4079 // CHECK:   [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4080 // CHECK:   ret <16 x i8> [[LANE]]
test_vld1q_dup_u8(uint8_t const * a)4081 uint8x16_t test_vld1q_dup_u8(uint8_t const * a) {
4082   return vld1q_dup_u8(a);
4083 }
4084 
4085 // CHECK-LABEL: @test_vld1q_dup_u16(
4086 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4087 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4088 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4089 // CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4090 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4091 // CHECK:   ret <8 x i16> [[LANE]]
test_vld1q_dup_u16(uint16_t const * a)4092 uint16x8_t test_vld1q_dup_u16(uint16_t const * a) {
4093   return vld1q_dup_u16(a);
4094 }
4095 
4096 // CHECK-LABEL: @test_vld1q_dup_u32(
4097 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4098 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4099 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4100 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4101 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4102 // CHECK:   ret <4 x i32> [[LANE]]
test_vld1q_dup_u32(uint32_t const * a)4103 uint32x4_t test_vld1q_dup_u32(uint32_t const * a) {
4104   return vld1q_dup_u32(a);
4105 }
4106 
4107 // CHECK-LABEL: @test_vld1q_dup_u64(
4108 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4109 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4110 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4111 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4112 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4113 // CHECK:   ret <2 x i64> [[LANE]]
test_vld1q_dup_u64(uint64_t const * a)4114 uint64x2_t test_vld1q_dup_u64(uint64_t const * a) {
4115   return vld1q_dup_u64(a);
4116 }
4117 
4118 // CHECK-LABEL: @test_vld1q_dup_s8(
4119 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4120 // CHECK:   [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4121 // CHECK:   [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4122 // CHECK:   ret <16 x i8> [[LANE]]
test_vld1q_dup_s8(int8_t const * a)4123 int8x16_t test_vld1q_dup_s8(int8_t const * a) {
4124   return vld1q_dup_s8(a);
4125 }
4126 
4127 // CHECK-LABEL: @test_vld1q_dup_s16(
4128 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4129 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4130 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4131 // CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4132 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4133 // CHECK:   ret <8 x i16> [[LANE]]
test_vld1q_dup_s16(int16_t const * a)4134 int16x8_t test_vld1q_dup_s16(int16_t const * a) {
4135   return vld1q_dup_s16(a);
4136 }
4137 
4138 // CHECK-LABEL: @test_vld1q_dup_s32(
4139 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4140 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4141 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4142 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i32> undef, i32 [[TMP2]], i32 0
4143 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> [[TMP3]], <4 x i32> zeroinitializer
4144 // CHECK:   ret <4 x i32> [[LANE]]
test_vld1q_dup_s32(int32_t const * a)4145 int32x4_t test_vld1q_dup_s32(int32_t const * a) {
4146   return vld1q_dup_s32(a);
4147 }
4148 
4149 // CHECK-LABEL: @test_vld1q_dup_s64(
4150 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4151 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4152 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4153 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i64> undef, i64 [[TMP2]], i32 0
4154 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i64> [[TMP3]], <2 x i64> [[TMP3]], <2 x i32> zeroinitializer
4155 // CHECK:   ret <2 x i64> [[LANE]]
test_vld1q_dup_s64(int64_t const * a)4156 int64x2_t test_vld1q_dup_s64(int64_t const * a) {
4157   return vld1q_dup_s64(a);
4158 }
4159 
4160 // CHECK-LABEL: @test_vld1q_dup_f16(
4161 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4162 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4163 // CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4164 // CHECK:   [[TMP3:%.*]] = insertelement <8 x half> undef, half [[TMP2]], i32 0
4165 // CHECK:   [[LANE:%.*]] = shufflevector <8 x half> [[TMP3]], <8 x half> [[TMP3]], <8 x i32> zeroinitializer
4166 // CHECK:   ret <8 x half> [[LANE]]
test_vld1q_dup_f16(float16_t const * a)4167 float16x8_t test_vld1q_dup_f16(float16_t const * a) {
4168   return vld1q_dup_f16(a);
4169 }
4170 
4171 // CHECK-LABEL: @test_vld1q_dup_f32(
4172 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4173 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4174 // CHECK:   [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4175 // CHECK:   [[TMP3:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
4176 // CHECK:   [[LANE:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP3]], <4 x i32> zeroinitializer
4177 // CHECK:   ret <4 x float> [[LANE]]
test_vld1q_dup_f32(float32_t const * a)4178 float32x4_t test_vld1q_dup_f32(float32_t const * a) {
4179   return vld1q_dup_f32(a);
4180 }
4181 
4182 // CHECK-LABEL: @test_vld1q_dup_p8(
4183 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4184 // CHECK:   [[TMP1:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
4185 // CHECK:   [[LANE:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> [[TMP1]], <16 x i32> zeroinitializer
4186 // CHECK:   ret <16 x i8> [[LANE]]
test_vld1q_dup_p8(poly8_t const * a)4187 poly8x16_t test_vld1q_dup_p8(poly8_t const * a) {
4188   return vld1q_dup_p8(a);
4189 }
4190 
4191 // CHECK-LABEL: @test_vld1q_dup_p16(
4192 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4193 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4194 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4195 // CHECK:   [[TMP3:%.*]] = insertelement <8 x i16> undef, i16 [[TMP2]], i32 0
4196 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> [[TMP3]], <8 x i32> zeroinitializer
4197 // CHECK:   ret <8 x i16> [[LANE]]
test_vld1q_dup_p16(poly16_t const * a)4198 poly16x8_t test_vld1q_dup_p16(poly16_t const * a) {
4199   return vld1q_dup_p16(a);
4200 }
4201 
4202 // CHECK-LABEL: @test_vld1_dup_u8(
4203 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4204 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4205 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4206 // CHECK:   ret <8 x i8> [[LANE]]
test_vld1_dup_u8(uint8_t const * a)4207 uint8x8_t test_vld1_dup_u8(uint8_t const * a) {
4208   return vld1_dup_u8(a);
4209 }
4210 
4211 // CHECK-LABEL: @test_vld1_dup_u16(
4212 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4213 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4214 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4215 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4216 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4217 // CHECK:   ret <4 x i16> [[LANE]]
test_vld1_dup_u16(uint16_t const * a)4218 uint16x4_t test_vld1_dup_u16(uint16_t const * a) {
4219   return vld1_dup_u16(a);
4220 }
4221 
4222 // CHECK-LABEL: @test_vld1_dup_u32(
4223 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4224 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4225 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4226 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4227 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4228 // CHECK:   ret <2 x i32> [[LANE]]
test_vld1_dup_u32(uint32_t const * a)4229 uint32x2_t test_vld1_dup_u32(uint32_t const * a) {
4230   return vld1_dup_u32(a);
4231 }
4232 
4233 // CHECK-LABEL: @test_vld1_dup_u64(
4234 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4235 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4236 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4237 // CHECK:   [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4238 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4239 // CHECK:   ret <1 x i64> [[LANE]]
test_vld1_dup_u64(uint64_t const * a)4240 uint64x1_t test_vld1_dup_u64(uint64_t const * a) {
4241   return vld1_dup_u64(a);
4242 }
4243 
4244 // CHECK-LABEL: @test_vld1_dup_s8(
4245 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4246 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4247 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4248 // CHECK:   ret <8 x i8> [[LANE]]
test_vld1_dup_s8(int8_t const * a)4249 int8x8_t test_vld1_dup_s8(int8_t const * a) {
4250   return vld1_dup_s8(a);
4251 }
4252 
4253 // CHECK-LABEL: @test_vld1_dup_s16(
4254 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4255 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4256 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4257 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4258 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4259 // CHECK:   ret <4 x i16> [[LANE]]
test_vld1_dup_s16(int16_t const * a)4260 int16x4_t test_vld1_dup_s16(int16_t const * a) {
4261   return vld1_dup_s16(a);
4262 }
4263 
4264 // CHECK-LABEL: @test_vld1_dup_s32(
4265 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4266 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32*
4267 // CHECK:   [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4
4268 // CHECK:   [[TMP3:%.*]] = insertelement <2 x i32> undef, i32 [[TMP2]], i32 0
4269 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP3]], <2 x i32> zeroinitializer
4270 // CHECK:   ret <2 x i32> [[LANE]]
test_vld1_dup_s32(int32_t const * a)4271 int32x2_t test_vld1_dup_s32(int32_t const * a) {
4272   return vld1_dup_s32(a);
4273 }
4274 
4275 // CHECK-LABEL: @test_vld1_dup_s64(
4276 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4277 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i64*
4278 // CHECK:   [[TMP2:%.*]] = load i64, i64* [[TMP1]], align 4
4279 // CHECK:   [[TMP3:%.*]] = insertelement <1 x i64> undef, i64 [[TMP2]], i32 0
4280 // CHECK:   [[LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
4281 // CHECK:   ret <1 x i64> [[LANE]]
test_vld1_dup_s64(int64_t const * a)4282 int64x1_t test_vld1_dup_s64(int64_t const * a) {
4283   return vld1_dup_s64(a);
4284 }
4285 
4286 // CHECK-LABEL: @test_vld1_dup_f16(
4287 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4288 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to half*
4289 // CHECK:   [[TMP2:%.*]] = load half, half* [[TMP1]], align 2
4290 // CHECK:   [[TMP3:%.*]] = insertelement <4 x half> undef, half [[TMP2]], i32 0
4291 // CHECK:   [[LANE:%.*]] = shufflevector <4 x half> [[TMP3]], <4 x half> [[TMP3]], <4 x i32> zeroinitializer
4292 // CHECK:   ret <4 x half> [[LANE]]
test_vld1_dup_f16(float16_t const * a)4293 float16x4_t test_vld1_dup_f16(float16_t const * a) {
4294   return vld1_dup_f16(a);
4295 }
4296 
4297 // CHECK-LABEL: @test_vld1_dup_f32(
4298 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4299 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to float*
4300 // CHECK:   [[TMP2:%.*]] = load float, float* [[TMP1]], align 4
4301 // CHECK:   [[TMP3:%.*]] = insertelement <2 x float> undef, float [[TMP2]], i32 0
4302 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP3]], <2 x i32> zeroinitializer
4303 // CHECK:   ret <2 x float> [[LANE]]
test_vld1_dup_f32(float32_t const * a)4304 float32x2_t test_vld1_dup_f32(float32_t const * a) {
4305   return vld1_dup_f32(a);
4306 }
4307 
4308 // CHECK-LABEL: @test_vld1_dup_p8(
4309 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4310 // CHECK:   [[TMP1:%.*]] = insertelement <8 x i8> undef, i8 [[TMP0]], i32 0
4311 // CHECK:   [[LANE:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> [[TMP1]], <8 x i32> zeroinitializer
4312 // CHECK:   ret <8 x i8> [[LANE]]
test_vld1_dup_p8(poly8_t const * a)4313 poly8x8_t test_vld1_dup_p8(poly8_t const * a) {
4314   return vld1_dup_p8(a);
4315 }
4316 
4317 // CHECK-LABEL: @test_vld1_dup_p16(
4318 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4319 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i16*
4320 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
4321 // CHECK:   [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[TMP2]], i32 0
4322 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> [[TMP3]], <4 x i32> zeroinitializer
4323 // CHECK:   ret <4 x i16> [[LANE]]
test_vld1_dup_p16(poly16_t const * a)4324 poly16x4_t test_vld1_dup_p16(poly16_t const * a) {
4325   return vld1_dup_p16(a);
4326 }
4327 
4328 // CHECK-LABEL: @test_vld1q_lane_u8(
4329 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4330 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4331 // CHECK:   ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_u8(uint8_t const * a,uint8x16_t b)4332 uint8x16_t test_vld1q_lane_u8(uint8_t const * a, uint8x16_t b) {
4333   return vld1q_lane_u8(a, b, 15);
4334 }
4335 
4336 // CHECK-LABEL: @test_vld1q_lane_u16(
4337 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4338 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4339 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4340 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4341 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4342 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4343 // CHECK:   ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_u16(uint16_t const * a,uint16x8_t b)4344 uint16x8_t test_vld1q_lane_u16(uint16_t const * a, uint16x8_t b) {
4345   return vld1q_lane_u16(a, b, 7);
4346 }
4347 
4348 // CHECK-LABEL: @test_vld1q_lane_u32(
4349 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4350 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4351 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4352 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4353 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4354 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4355 // CHECK:   ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_u32(uint32_t const * a,uint32x4_t b)4356 uint32x4_t test_vld1q_lane_u32(uint32_t const * a, uint32x4_t b) {
4357   return vld1q_lane_u32(a, b, 3);
4358 }
4359 
4360 // CHECK-LABEL: @test_vld1q_lane_u64(
4361 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4362 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4363 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4364 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4365 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4366 // CHECK:   [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4367 // CHECK:   ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_u64(uint64_t const * a,uint64x2_t b)4368 uint64x2_t test_vld1q_lane_u64(uint64_t const * a, uint64x2_t b) {
4369   return vld1q_lane_u64(a, b, 1);
4370 }
4371 
4372 // CHECK-LABEL: @test_vld1q_lane_s8(
4373 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4374 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4375 // CHECK:   ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_s8(int8_t const * a,int8x16_t b)4376 int8x16_t test_vld1q_lane_s8(int8_t const * a, int8x16_t b) {
4377   return vld1q_lane_s8(a, b, 15);
4378 }
4379 
4380 // CHECK-LABEL: @test_vld1q_lane_s16(
4381 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4382 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4383 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4384 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4385 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4386 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4387 // CHECK:   ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_s16(int16_t const * a,int16x8_t b)4388 int16x8_t test_vld1q_lane_s16(int16_t const * a, int16x8_t b) {
4389   return vld1q_lane_s16(a, b, 7);
4390 }
4391 
4392 // CHECK-LABEL: @test_vld1q_lane_s32(
4393 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4394 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
4395 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
4396 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4397 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4398 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[TMP4]], i32 3
4399 // CHECK:   ret <4 x i32> [[VLD1_LANE]]
test_vld1q_lane_s32(int32_t const * a,int32x4_t b)4400 int32x4_t test_vld1q_lane_s32(int32_t const * a, int32x4_t b) {
4401   return vld1q_lane_s32(a, b, 3);
4402 }
4403 
4404 // CHECK-LABEL: @test_vld1q_lane_s64(
4405 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4406 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
4407 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
4408 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> zeroinitializer
4409 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vld1.v1i64.p0i8(i8* [[TMP0]], i32 4)
4410 // CHECK:   [[VLD1Q_LANE:%.*]] = shufflevector <1 x i64> [[TMP3]], <1 x i64> [[TMP4]], <2 x i32> <i32 0, i32 1>
4411 // CHECK:   ret <2 x i64> [[VLD1Q_LANE]]
test_vld1q_lane_s64(int64_t const * a,int64x2_t b)4412 int64x2_t test_vld1q_lane_s64(int64_t const * a, int64x2_t b) {
4413   return vld1q_lane_s64(a, b, 1);
4414 }
4415 
4416 // CHECK-LABEL: @test_vld1q_lane_f16(
4417 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4418 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
4419 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
4420 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4421 // CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4422 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x half> [[TMP2]], half [[TMP4]], i32 7
4423 // CHECK:   ret <8 x half> [[VLD1_LANE]]
test_vld1q_lane_f16(float16_t const * a,float16x8_t b)4424 float16x8_t test_vld1q_lane_f16(float16_t const * a, float16x8_t b) {
4425   return vld1q_lane_f16(a, b, 7);
4426 }
4427 
4428 // CHECK-LABEL: @test_vld1q_lane_f32(
4429 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4430 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
4431 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
4432 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4433 // CHECK:   [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4434 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP4]], i32 3
4435 // CHECK:   ret <4 x float> [[VLD1_LANE]]
test_vld1q_lane_f32(float32_t const * a,float32x4_t b)4436 float32x4_t test_vld1q_lane_f32(float32_t const * a, float32x4_t b) {
4437   return vld1q_lane_f32(a, b, 3);
4438 }
4439 
4440 // CHECK-LABEL: @test_vld1q_lane_p8(
4441 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4442 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <16 x i8> %b, i8 [[TMP0]], i32 15
4443 // CHECK:   ret <16 x i8> [[VLD1_LANE]]
test_vld1q_lane_p8(poly8_t const * a,poly8x16_t b)4444 poly8x16_t test_vld1q_lane_p8(poly8_t const * a, poly8x16_t b) {
4445   return vld1q_lane_p8(a, b, 15);
4446 }
4447 
4448 // CHECK-LABEL: @test_vld1q_lane_p16(
4449 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4450 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
4451 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
4452 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4453 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4454 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i16> [[TMP2]], i16 [[TMP4]], i32 7
4455 // CHECK:   ret <8 x i16> [[VLD1_LANE]]
test_vld1q_lane_p16(poly16_t const * a,poly16x8_t b)4456 poly16x8_t test_vld1q_lane_p16(poly16_t const * a, poly16x8_t b) {
4457   return vld1q_lane_p16(a, b, 7);
4458 }
4459 
4460 // CHECK-LABEL: @test_vld1_lane_u8(
4461 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4462 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4463 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_u8(uint8_t const * a,uint8x8_t b)4464 uint8x8_t test_vld1_lane_u8(uint8_t const * a, uint8x8_t b) {
4465   return vld1_lane_u8(a, b, 7);
4466 }
4467 
4468 // CHECK-LABEL: @test_vld1_lane_u16(
4469 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4470 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4471 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4472 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4473 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4474 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4475 // CHECK:   ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_u16(uint16_t const * a,uint16x4_t b)4476 uint16x4_t test_vld1_lane_u16(uint16_t const * a, uint16x4_t b) {
4477   return vld1_lane_u16(a, b, 3);
4478 }
4479 
4480 // CHECK-LABEL: @test_vld1_lane_u32(
4481 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4482 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4483 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4484 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4485 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4486 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4487 // CHECK:   ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_u32(uint32_t const * a,uint32x2_t b)4488 uint32x2_t test_vld1_lane_u32(uint32_t const * a, uint32x2_t b) {
4489   return vld1_lane_u32(a, b, 1);
4490 }
4491 
4492 // CHECK-LABEL: @test_vld1_lane_u64(
4493 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4494 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4495 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4496 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4497 // CHECK:   [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4498 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4499 // CHECK:   ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_u64(uint64_t const * a,uint64x1_t b)4500 uint64x1_t test_vld1_lane_u64(uint64_t const * a, uint64x1_t b) {
4501   return vld1_lane_u64(a, b, 0);
4502 }
4503 
4504 // CHECK-LABEL: @test_vld1_lane_s8(
4505 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4506 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4507 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_s8(int8_t const * a,int8x8_t b)4508 int8x8_t test_vld1_lane_s8(int8_t const * a, int8x8_t b) {
4509   return vld1_lane_s8(a, b, 7);
4510 }
4511 
4512 // CHECK-LABEL: @test_vld1_lane_s16(
4513 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4514 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4515 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4516 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4517 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4518 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4519 // CHECK:   ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_s16(int16_t const * a,int16x4_t b)4520 int16x4_t test_vld1_lane_s16(int16_t const * a, int16x4_t b) {
4521   return vld1_lane_s16(a, b, 3);
4522 }
4523 
4524 // CHECK-LABEL: @test_vld1_lane_s32(
4525 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
4526 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
4527 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
4528 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i32*
4529 // CHECK:   [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4
4530 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <2 x i32> [[TMP2]], i32 [[TMP4]], i32 1
4531 // CHECK:   ret <2 x i32> [[VLD1_LANE]]
test_vld1_lane_s32(int32_t const * a,int32x2_t b)4532 int32x2_t test_vld1_lane_s32(int32_t const * a, int32x2_t b) {
4533   return vld1_lane_s32(a, b, 1);
4534 }
4535 
4536 // CHECK-LABEL: @test_vld1_lane_s64(
4537 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
4538 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
4539 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
4540 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i64*
4541 // CHECK:   [[TMP4:%.*]] = load i64, i64* [[TMP3]], align 4
4542 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <1 x i64> [[TMP2]], i64 [[TMP4]], i32 0
4543 // CHECK:   ret <1 x i64> [[VLD1_LANE]]
test_vld1_lane_s64(int64_t const * a,int64x1_t b)4544 int64x1_t test_vld1_lane_s64(int64_t const * a, int64x1_t b) {
4545   return vld1_lane_s64(a, b, 0);
4546 }
4547 
4548 // CHECK-LABEL: @test_vld1_lane_f16(
4549 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
4550 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
4551 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
4552 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to half*
4553 // CHECK:   [[TMP4:%.*]] = load half, half* [[TMP3]], align 2
4554 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x half> [[TMP2]], half [[TMP4]], i32 3
4555 // CHECK:   ret <4 x half> [[VLD1_LANE]]
test_vld1_lane_f16(float16_t const * a,float16x4_t b)4556 float16x4_t test_vld1_lane_f16(float16_t const * a, float16x4_t b) {
4557   return vld1_lane_f16(a, b, 3);
4558 }
4559 
4560 // CHECK-LABEL: @test_vld1_lane_f32(
4561 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
4562 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
4563 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
4564 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to float*
4565 // CHECK:   [[TMP4:%.*]] = load float, float* [[TMP3]], align 4
4566 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP4]], i32 1
4567 // CHECK:   ret <2 x float> [[VLD1_LANE]]
test_vld1_lane_f32(float32_t const * a,float32x2_t b)4568 float32x2_t test_vld1_lane_f32(float32_t const * a, float32x2_t b) {
4569   return vld1_lane_f32(a, b, 1);
4570 }
4571 
4572 // CHECK-LABEL: @test_vld1_lane_p8(
4573 // CHECK:   [[TMP0:%.*]] = load i8, i8* %a, align 1
4574 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <8 x i8> %b, i8 [[TMP0]], i32 7
4575 // CHECK:   ret <8 x i8> [[VLD1_LANE]]
test_vld1_lane_p8(poly8_t const * a,poly8x8_t b)4576 poly8x8_t test_vld1_lane_p8(poly8_t const * a, poly8x8_t b) {
4577   return vld1_lane_p8(a, b, 7);
4578 }
4579 
4580 // CHECK-LABEL: @test_vld1_lane_p16(
4581 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
4582 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
4583 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
4584 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to i16*
4585 // CHECK:   [[TMP4:%.*]] = load i16, i16* [[TMP3]], align 2
4586 // CHECK:   [[VLD1_LANE:%.*]] = insertelement <4 x i16> [[TMP2]], i16 [[TMP4]], i32 3
4587 // CHECK:   ret <4 x i16> [[VLD1_LANE]]
test_vld1_lane_p16(poly16_t const * a,poly16x4_t b)4588 poly16x4_t test_vld1_lane_p16(poly16_t const * a, poly16x4_t b) {
4589   return vld1_lane_p16(a, b, 3);
4590 }
4591 
4592 // CHECK-LABEL: @test_vld2q_u8(
4593 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x2_t, align 16
4594 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[__RET]] to i8*
4595 // CHECK:   [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_u8(uint8_t const * a)4596 uint8x16x2_t test_vld2q_u8(uint8_t const * a) {
4597   return vld2q_u8(a);
4598 }
4599 
4600 // CHECK-LABEL: @test_vld2q_u16(
4601 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4602 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4603 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4604 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_u16(uint16_t const * a)4605 uint16x8x2_t test_vld2q_u16(uint16_t const * a) {
4606   return vld2q_u16(a);
4607 }
4608 
4609 // CHECK-LABEL: @test_vld2q_u32(
4610 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4611 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4612 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4613 // CHECK:   [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_u32(uint32_t const * a)4614 uint32x4x2_t test_vld2q_u32(uint32_t const * a) {
4615   return vld2q_u32(a);
4616 }
4617 
4618 // CHECK-LABEL: @test_vld2q_s8(
4619 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x2_t, align 16
4620 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[__RET]] to i8*
4621 // CHECK:   [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_s8(int8_t const * a)4622 int8x16x2_t test_vld2q_s8(int8_t const * a) {
4623   return vld2q_s8(a);
4624 }
4625 
4626 // CHECK-LABEL: @test_vld2q_s16(
4627 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4628 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4629 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4630 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_s16(int16_t const * a)4631 int16x8x2_t test_vld2q_s16(int16_t const * a) {
4632   return vld2q_s16(a);
4633 }
4634 
4635 // CHECK-LABEL: @test_vld2q_s32(
4636 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4637 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4638 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4639 // CHECK:   [[VLD2Q_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_s32(int32_t const * a)4640 int32x4x2_t test_vld2q_s32(int32_t const * a) {
4641   return vld2q_s32(a);
4642 }
4643 
4644 // CHECK-LABEL: @test_vld2q_f16(
4645 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4646 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4647 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
4648 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_f16(float16_t const * a)4649 float16x8x2_t test_vld2q_f16(float16_t const * a) {
4650   return vld2q_f16(a);
4651 }
4652 
4653 // CHECK-LABEL: @test_vld2q_f32(
4654 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4655 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4656 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
4657 // CHECK:   [[VLD2Q_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_f32(float32_t const * a)4658 float32x4x2_t test_vld2q_f32(float32_t const * a) {
4659   return vld2q_f32(a);
4660 }
4661 
4662 // CHECK-LABEL: @test_vld2q_p8(
4663 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x2_t, align 16
4664 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[__RET]] to i8*
4665 // CHECK:   [[VLD2Q_V:%.*]] = call { <16 x i8>, <16 x i8>
test_vld2q_p8(poly8_t const * a)4666 poly8x16x2_t test_vld2q_p8(poly8_t const * a) {
4667   return vld2q_p8(a);
4668 }
4669 
4670 // CHECK-LABEL: @test_vld2q_p16(
4671 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4672 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4673 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4674 // CHECK:   [[VLD2Q_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_p16(poly16_t const * a)4675 poly16x8x2_t test_vld2q_p16(poly16_t const * a) {
4676   return vld2q_p16(a);
4677 }
4678 
4679 // CHECK-LABEL: @test_vld2_u8(
4680 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4681 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4682 // CHECK:   [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_u8(uint8_t const * a)4683 uint8x8x2_t test_vld2_u8(uint8_t const * a) {
4684   return vld2_u8(a);
4685 }
4686 
4687 // CHECK-LABEL: @test_vld2_u16(
4688 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4689 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
4690 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4691 // CHECK:   [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_u16(uint16_t const * a)4692 uint16x4x2_t test_vld2_u16(uint16_t const * a) {
4693   return vld2_u16(a);
4694 }
4695 
4696 // CHECK-LABEL: @test_vld2_u32(
4697 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
4698 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
4699 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4700 // CHECK:   [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_u32(uint32_t const * a)4701 uint32x2x2_t test_vld2_u32(uint32_t const * a) {
4702   return vld2_u32(a);
4703 }
4704 
4705 // CHECK-LABEL: @test_vld2_u64(
4706 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x2_t, align 8
4707 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x2_t* [[__RET]] to i8*
4708 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
4709 // CHECK:   [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_u64(uint64_t const * a)4710 uint64x1x2_t test_vld2_u64(uint64_t const * a) {
4711   return vld2_u64(a);
4712 }
4713 
4714 // CHECK-LABEL: @test_vld2_s8(
4715 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
4716 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
4717 // CHECK:   [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_s8(int8_t const * a)4718 int8x8x2_t test_vld2_s8(int8_t const * a) {
4719   return vld2_s8(a);
4720 }
4721 
4722 // CHECK-LABEL: @test_vld2_s16(
4723 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
4724 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
4725 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4726 // CHECK:   [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_s16(int16_t const * a)4727 int16x4x2_t test_vld2_s16(int16_t const * a) {
4728   return vld2_s16(a);
4729 }
4730 
4731 // CHECK-LABEL: @test_vld2_s32(
4732 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
4733 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
4734 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
4735 // CHECK:   [[VLD2_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_s32(int32_t const * a)4736 int32x2x2_t test_vld2_s32(int32_t const * a) {
4737   return vld2_s32(a);
4738 }
4739 
4740 // CHECK-LABEL: @test_vld2_s64(
4741 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x2_t, align 8
4742 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x2_t* [[__RET]] to i8*
4743 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
4744 // CHECK:   [[VLD2_V:%.*]] = call { <1 x i64>, <1 x i64>
test_vld2_s64(int64_t const * a)4745 int64x1x2_t test_vld2_s64(int64_t const * a) {
4746   return vld2_s64(a);
4747 }
4748 
4749 // CHECK-LABEL: @test_vld2_f16(
4750 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
4751 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
4752 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
4753 // CHECK:   [[VLD2_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_f16(float16_t const * a)4754 float16x4x2_t test_vld2_f16(float16_t const * a) {
4755   return vld2_f16(a);
4756 }
4757 
4758 // CHECK-LABEL: @test_vld2_f32(
4759 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
4760 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
4761 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
4762 // CHECK:   [[VLD2_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_f32(float32_t const * a)4763 float32x2x2_t test_vld2_f32(float32_t const * a) {
4764   return vld2_f32(a);
4765 }
4766 
4767 // CHECK-LABEL: @test_vld2_p8(
4768 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
4769 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
4770 // CHECK:   [[VLD2_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_p8(poly8_t const * a)4771 poly8x8x2_t test_vld2_p8(poly8_t const * a) {
4772   return vld2_p8(a);
4773 }
4774 
4775 // CHECK-LABEL: @test_vld2_p16(
4776 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
4777 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
4778 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
4779 // CHECK:   [[VLD2_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_p16(poly16_t const * a)4780 poly16x4x2_t test_vld2_p16(poly16_t const * a) {
4781   return vld2_p16(a);
4782 }
4783 
4784 // CHECK-LABEL: @test_vld2q_lane_u16(
4785 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
4786 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
4787 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x2_t, align 16
4788 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
4789 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4790 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4791 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
4792 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
4793 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4794 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x2_t* [[__RET]] to i8*
4795 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
4796 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4797 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4798 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4799 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4800 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
4801 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4802 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4803 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4804 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4805 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4806 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_u16(uint16_t const * a,uint16x8x2_t b)4807 uint16x8x2_t test_vld2q_lane_u16(uint16_t const * a, uint16x8x2_t b) {
4808   return vld2q_lane_u16(a, b, 7);
4809 }
4810 
4811 // CHECK-LABEL: @test_vld2q_lane_u32(
4812 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
4813 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
4814 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x2_t, align 16
4815 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
4816 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4817 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4818 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
4819 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
4820 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4821 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x2_t* [[__RET]] to i8*
4822 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
4823 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4824 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4825 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4826 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4827 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
4828 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4829 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4830 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4831 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4832 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4833 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_u32(uint32_t const * a,uint32x4x2_t b)4834 uint32x4x2_t test_vld2q_lane_u32(uint32_t const * a, uint32x4x2_t b) {
4835   return vld2q_lane_u32(a, b, 3);
4836 }
4837 
4838 // CHECK-LABEL: @test_vld2q_lane_s16(
4839 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
4840 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
4841 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x2_t, align 16
4842 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
4843 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4844 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4845 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
4846 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
4847 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4848 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x2_t* [[__RET]] to i8*
4849 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
4850 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4851 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4852 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4853 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4854 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
4855 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4856 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4857 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4858 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4859 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4860 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_s16(int16_t const * a,int16x8x2_t b)4861 int16x8x2_t test_vld2q_lane_s16(int16_t const * a, int16x8x2_t b) {
4862   return vld2q_lane_s16(a, b, 7);
4863 }
4864 
4865 // CHECK-LABEL: @test_vld2q_lane_s32(
4866 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
4867 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
4868 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x2_t, align 16
4869 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
4870 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
4871 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4872 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
4873 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
4874 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4875 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x2_t* [[__RET]] to i8*
4876 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
4877 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4878 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
4879 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
4880 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
4881 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
4882 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
4883 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
4884 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
4885 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
4886 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
4887 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>
test_vld2q_lane_s32(int32_t const * a,int32x4x2_t b)4888 int32x4x2_t test_vld2q_lane_s32(int32_t const * a, int32x4x2_t b) {
4889   return vld2q_lane_s32(a, b, 3);
4890 }
4891 
4892 // CHECK-LABEL: @test_vld2q_lane_f16(
4893 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
4894 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
4895 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x2_t, align 16
4896 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
4897 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
4898 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4899 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
4900 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
4901 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4902 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x2_t* [[__RET]] to i8*
4903 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
4904 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4905 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
4906 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
4907 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
4908 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
4909 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
4910 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
4911 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
4912 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
4913 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
4914 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>
test_vld2q_lane_f16(float16_t const * a,float16x8x2_t b)4915 float16x8x2_t test_vld2q_lane_f16(float16_t const * a, float16x8x2_t b) {
4916   return vld2q_lane_f16(a, b, 7);
4917 }
4918 
4919 // CHECK-LABEL: @test_vld2q_lane_f32(
4920 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
4921 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
4922 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x2_t, align 16
4923 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
4924 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
4925 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4926 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
4927 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
4928 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4929 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x2_t* [[__RET]] to i8*
4930 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
4931 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4932 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
4933 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
4934 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
4935 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
4936 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
4937 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
4938 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
4939 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
4940 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
4941 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>
test_vld2q_lane_f32(float32_t const * a,float32x4x2_t b)4942 float32x4x2_t test_vld2q_lane_f32(float32_t const * a, float32x4x2_t b) {
4943   return vld2q_lane_f32(a, b, 3);
4944 }
4945 
4946 // CHECK-LABEL: @test_vld2q_lane_p16(
4947 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
4948 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
4949 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x2_t, align 16
4950 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
4951 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
4952 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
4953 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
4954 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
4955 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
4956 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x2_t* [[__RET]] to i8*
4957 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
4958 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4959 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
4960 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
4961 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
4962 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
4963 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
4964 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
4965 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
4966 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
4967 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
4968 // CHECK:   [[VLD2Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>
test_vld2q_lane_p16(poly16_t const * a,poly16x8x2_t b)4969 poly16x8x2_t test_vld2q_lane_p16(poly16_t const * a, poly16x8x2_t b) {
4970   return vld2q_lane_p16(a, b, 7);
4971 }
4972 
4973 // CHECK-LABEL: @test_vld2_lane_u8(
4974 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
4975 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
4976 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x2_t, align 8
4977 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
4978 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
4979 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
4980 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
4981 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
4982 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
4983 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x2_t* [[__RET]] to i8*
4984 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4985 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
4986 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
4987 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
4988 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
4989 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
4990 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_u8(uint8_t const * a,uint8x8x2_t b)4991 uint8x8x2_t test_vld2_lane_u8(uint8_t const * a, uint8x8x2_t b) {
4992   return vld2_lane_u8(a, b, 7);
4993 }
4994 
4995 // CHECK-LABEL: @test_vld2_lane_u16(
4996 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
4997 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
4998 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x2_t, align 8
4999 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
5000 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5001 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5002 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
5003 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
5004 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5005 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x2_t* [[__RET]] to i8*
5006 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5007 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
5008 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5009 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5010 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5011 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
5012 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5013 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5014 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5015 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5016 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5017 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_u16(uint16_t const * a,uint16x4x2_t b)5018 uint16x4x2_t test_vld2_lane_u16(uint16_t const * a, uint16x4x2_t b) {
5019   return vld2_lane_u16(a, b, 3);
5020 }
5021 
5022 // CHECK-LABEL: @test_vld2_lane_u32(
5023 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
5024 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
5025 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x2_t, align 8
5026 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
5027 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5028 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5029 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
5030 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
5031 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5032 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x2_t* [[__RET]] to i8*
5033 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5034 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
5035 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5036 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5037 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5038 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
5039 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5040 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5041 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5042 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5043 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5044 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_u32(uint32_t const * a,uint32x2x2_t b)5045 uint32x2x2_t test_vld2_lane_u32(uint32_t const * a, uint32x2x2_t b) {
5046   return vld2_lane_u32(a, b, 1);
5047 }
5048 
5049 // CHECK-LABEL: @test_vld2_lane_s8(
5050 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
5051 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
5052 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x2_t, align 8
5053 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
5054 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5055 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5056 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
5057 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
5058 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5059 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x2_t* [[__RET]] to i8*
5060 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5061 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5062 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5063 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
5064 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5065 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5066 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_s8(int8_t const * a,int8x8x2_t b)5067 int8x8x2_t test_vld2_lane_s8(int8_t const * a, int8x8x2_t b) {
5068   return vld2_lane_s8(a, b, 7);
5069 }
5070 
5071 // CHECK-LABEL: @test_vld2_lane_s16(
5072 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
5073 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
5074 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x2_t, align 8
5075 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
5076 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5077 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5078 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
5079 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
5080 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5081 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x2_t* [[__RET]] to i8*
5082 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5083 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5084 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5085 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5086 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5087 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
5088 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5089 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5090 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5091 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5092 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5093 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_s16(int16_t const * a,int16x4x2_t b)5094 int16x4x2_t test_vld2_lane_s16(int16_t const * a, int16x4x2_t b) {
5095   return vld2_lane_s16(a, b, 3);
5096 }
5097 
5098 // CHECK-LABEL: @test_vld2_lane_s32(
5099 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
5100 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
5101 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x2_t, align 8
5102 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
5103 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
5104 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5105 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
5106 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
5107 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5108 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x2_t* [[__RET]] to i8*
5109 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5110 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5111 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
5112 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5113 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5114 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
5115 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5116 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5117 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5118 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5119 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5120 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>
test_vld2_lane_s32(int32_t const * a,int32x2x2_t b)5121 int32x2x2_t test_vld2_lane_s32(int32_t const * a, int32x2x2_t b) {
5122   return vld2_lane_s32(a, b, 1);
5123 }
5124 
5125 // CHECK-LABEL: @test_vld2_lane_f16(
5126 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
5127 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
5128 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x2_t, align 8
5129 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
5130 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
5131 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5132 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
5133 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
5134 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5135 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x2_t* [[__RET]] to i8*
5136 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
5137 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5138 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
5139 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5140 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5141 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
5142 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
5143 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5144 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5145 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5146 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5147 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x half>, <4 x half>
test_vld2_lane_f16(float16_t const * a,float16x4x2_t b)5148 float16x4x2_t test_vld2_lane_f16(float16_t const * a, float16x4x2_t b) {
5149   return vld2_lane_f16(a, b, 3);
5150 }
5151 
5152 // CHECK-LABEL: @test_vld2_lane_f32(
5153 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
5154 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
5155 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x2_t, align 8
5156 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
5157 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
5158 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5159 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
5160 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
5161 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5162 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x2_t* [[__RET]] to i8*
5163 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
5164 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5165 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
5166 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5167 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5168 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
5169 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
5170 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5171 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5172 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5173 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5174 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <2 x float>, <2 x float>
test_vld2_lane_f32(float32_t const * a,float32x2x2_t b)5175 float32x2x2_t test_vld2_lane_f32(float32_t const * a, float32x2x2_t b) {
5176   return vld2_lane_f32(a, b, 1);
5177 }
5178 
5179 // CHECK-LABEL: @test_vld2_lane_p8(
5180 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
5181 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
5182 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x2_t, align 8
5183 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
5184 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
5185 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5186 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
5187 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
5188 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5189 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x2_t* [[__RET]] to i8*
5190 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5191 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
5192 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5193 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
5194 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5195 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5196 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>
test_vld2_lane_p8(poly8_t const * a,poly8x8x2_t b)5197 poly8x8x2_t test_vld2_lane_p8(poly8_t const * a, poly8x8x2_t b) {
5198   return vld2_lane_p8(a, b, 7);
5199 }
5200 
5201 // CHECK-LABEL: @test_vld2_lane_p16(
5202 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
5203 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
5204 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x2_t, align 8
5205 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
5206 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
5207 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
5208 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
5209 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
5210 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
5211 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x2_t* [[__RET]] to i8*
5212 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5213 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5214 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
5215 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5216 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5217 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
5218 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5219 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5220 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5221 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5222 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5223 // CHECK:   [[VLD2_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>
test_vld2_lane_p16(poly16_t const * a,poly16x4x2_t b)5224 poly16x4x2_t test_vld2_lane_p16(poly16_t const * a, poly16x4x2_t b) {
5225   return vld2_lane_p16(a, b, 3);
5226 }
5227 
5228 // CHECK-LABEL: @test_vld3q_u8(
5229 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x3_t, align 16
5230 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x3_t* [[__RET]] to i8*
5231 // CHECK:   [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_u8(uint8_t const * a)5232 uint8x16x3_t test_vld3q_u8(uint8_t const * a) {
5233   return vld3q_u8(a);
5234 }
5235 
5236 // CHECK-LABEL: @test_vld3q_u16(
5237 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5238 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5239 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5240 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_u16(uint16_t const * a)5241 uint16x8x3_t test_vld3q_u16(uint16_t const * a) {
5242   return vld3q_u16(a);
5243 }
5244 
5245 // CHECK-LABEL: @test_vld3q_u32(
5246 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5247 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5248 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5249 // CHECK:   [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_u32(uint32_t const * a)5250 uint32x4x3_t test_vld3q_u32(uint32_t const * a) {
5251   return vld3q_u32(a);
5252 }
5253 
5254 // CHECK-LABEL: @test_vld3q_s8(
5255 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x3_t, align 16
5256 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x3_t* [[__RET]] to i8*
5257 // CHECK:   [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_s8(int8_t const * a)5258 int8x16x3_t test_vld3q_s8(int8_t const * a) {
5259   return vld3q_s8(a);
5260 }
5261 
5262 // CHECK-LABEL: @test_vld3q_s16(
5263 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5264 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5265 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5266 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_s16(int16_t const * a)5267 int16x8x3_t test_vld3q_s16(int16_t const * a) {
5268   return vld3q_s16(a);
5269 }
5270 
5271 // CHECK-LABEL: @test_vld3q_s32(
5272 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5273 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5274 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5275 // CHECK:   [[VLD3Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_s32(int32_t const * a)5276 int32x4x3_t test_vld3q_s32(int32_t const * a) {
5277   return vld3q_s32(a);
5278 }
5279 
5280 // CHECK-LABEL: @test_vld3q_f16(
5281 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5282 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5283 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
5284 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_f16(float16_t const * a)5285 float16x8x3_t test_vld3q_f16(float16_t const * a) {
5286   return vld3q_f16(a);
5287 }
5288 
5289 // CHECK-LABEL: @test_vld3q_f32(
5290 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5291 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5292 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
5293 // CHECK:   [[VLD3Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_f32(float32_t const * a)5294 float32x4x3_t test_vld3q_f32(float32_t const * a) {
5295   return vld3q_f32(a);
5296 }
5297 
5298 // CHECK-LABEL: @test_vld3q_p8(
5299 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x3_t, align 16
5300 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x3_t* [[__RET]] to i8*
5301 // CHECK:   [[VLD3Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>
test_vld3q_p8(poly8_t const * a)5302 poly8x16x3_t test_vld3q_p8(poly8_t const * a) {
5303   return vld3q_p8(a);
5304 }
5305 
5306 // CHECK-LABEL: @test_vld3q_p16(
5307 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5308 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5309 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5310 // CHECK:   [[VLD3Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_p16(poly16_t const * a)5311 poly16x8x3_t test_vld3q_p16(poly16_t const * a) {
5312   return vld3q_p16(a);
5313 }
5314 
5315 // CHECK-LABEL: @test_vld3_u8(
5316 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5317 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5318 // CHECK:   [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_u8(uint8_t const * a)5319 uint8x8x3_t test_vld3_u8(uint8_t const * a) {
5320   return vld3_u8(a);
5321 }
5322 
5323 // CHECK-LABEL: @test_vld3_u16(
5324 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5325 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5326 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5327 // CHECK:   [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_u16(uint16_t const * a)5328 uint16x4x3_t test_vld3_u16(uint16_t const * a) {
5329   return vld3_u16(a);
5330 }
5331 
5332 // CHECK-LABEL: @test_vld3_u32(
5333 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5334 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5335 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5336 // CHECK:   [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_u32(uint32_t const * a)5337 uint32x2x3_t test_vld3_u32(uint32_t const * a) {
5338   return vld3_u32(a);
5339 }
5340 
5341 // CHECK-LABEL: @test_vld3_u64(
5342 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x3_t, align 8
5343 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x3_t* [[__RET]] to i8*
5344 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
5345 // CHECK:   [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_u64(uint64_t const * a)5346 uint64x1x3_t test_vld3_u64(uint64_t const * a) {
5347   return vld3_u64(a);
5348 }
5349 
5350 // CHECK-LABEL: @test_vld3_s8(
5351 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5352 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5353 // CHECK:   [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_s8(int8_t const * a)5354 int8x8x3_t test_vld3_s8(int8_t const * a) {
5355   return vld3_s8(a);
5356 }
5357 
5358 // CHECK-LABEL: @test_vld3_s16(
5359 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5360 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5361 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5362 // CHECK:   [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_s16(int16_t const * a)5363 int16x4x3_t test_vld3_s16(int16_t const * a) {
5364   return vld3_s16(a);
5365 }
5366 
5367 // CHECK-LABEL: @test_vld3_s32(
5368 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5369 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5370 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5371 // CHECK:   [[VLD3_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_s32(int32_t const * a)5372 int32x2x3_t test_vld3_s32(int32_t const * a) {
5373   return vld3_s32(a);
5374 }
5375 
5376 // CHECK-LABEL: @test_vld3_s64(
5377 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x3_t, align 8
5378 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x3_t* [[__RET]] to i8*
5379 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
5380 // CHECK:   [[VLD3_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>
test_vld3_s64(int64_t const * a)5381 int64x1x3_t test_vld3_s64(int64_t const * a) {
5382   return vld3_s64(a);
5383 }
5384 
5385 // CHECK-LABEL: @test_vld3_f16(
5386 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5387 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5388 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
5389 // CHECK:   [[VLD3_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_f16(float16_t const * a)5390 float16x4x3_t test_vld3_f16(float16_t const * a) {
5391   return vld3_f16(a);
5392 }
5393 
5394 // CHECK-LABEL: @test_vld3_f32(
5395 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5396 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5397 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
5398 // CHECK:   [[VLD3_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_f32(float32_t const * a)5399 float32x2x3_t test_vld3_f32(float32_t const * a) {
5400   return vld3_f32(a);
5401 }
5402 
5403 // CHECK-LABEL: @test_vld3_p8(
5404 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5405 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5406 // CHECK:   [[VLD3_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_p8(poly8_t const * a)5407 poly8x8x3_t test_vld3_p8(poly8_t const * a) {
5408   return vld3_p8(a);
5409 }
5410 
5411 // CHECK-LABEL: @test_vld3_p16(
5412 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5413 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5414 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5415 // CHECK:   [[VLD3_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_p16(poly16_t const * a)5416 poly16x4x3_t test_vld3_p16(poly16_t const * a) {
5417   return vld3_p16(a);
5418 }
5419 
5420 // CHECK-LABEL: @test_vld3q_lane_u16(
5421 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
5422 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
5423 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x3_t, align 16
5424 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
5425 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5426 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5427 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
5428 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
5429 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5430 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x3_t* [[__RET]] to i8*
5431 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5432 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5433 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5434 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5435 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5436 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5437 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5438 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5439 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5440 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
5441 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5442 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5443 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5444 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5445 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5446 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5447 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_u16(uint16_t const * a,uint16x8x3_t b)5448 uint16x8x3_t test_vld3q_lane_u16(uint16_t const * a, uint16x8x3_t b) {
5449   return vld3q_lane_u16(a, b, 7);
5450 }
5451 
5452 // CHECK-LABEL: @test_vld3q_lane_u32(
5453 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
5454 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
5455 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x3_t, align 16
5456 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
5457 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5458 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5459 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
5460 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
5461 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5462 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x3_t* [[__RET]] to i8*
5463 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5464 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5465 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5466 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5467 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5468 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5469 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5470 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5471 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5472 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
5473 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5474 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5475 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5476 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5477 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5478 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5479 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_u32(uint32_t const * a,uint32x4x3_t b)5480 uint32x4x3_t test_vld3q_lane_u32(uint32_t const * a, uint32x4x3_t b) {
5481   return vld3q_lane_u32(a, b, 3);
5482 }
5483 
5484 // CHECK-LABEL: @test_vld3q_lane_s16(
5485 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
5486 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
5487 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x3_t, align 16
5488 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
5489 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5490 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5491 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
5492 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
5493 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5494 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x3_t* [[__RET]] to i8*
5495 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5496 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5497 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5498 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5499 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5500 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5501 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5502 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5503 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5504 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
5505 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5506 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5507 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5508 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5509 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5510 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5511 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_s16(int16_t const * a,int16x8x3_t b)5512 int16x8x3_t test_vld3q_lane_s16(int16_t const * a, int16x8x3_t b) {
5513   return vld3q_lane_s16(a, b, 7);
5514 }
5515 
5516 // CHECK-LABEL: @test_vld3q_lane_s32(
5517 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
5518 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
5519 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x3_t, align 16
5520 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
5521 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
5522 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5523 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
5524 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
5525 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5526 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x3_t* [[__RET]] to i8*
5527 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5528 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5529 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
5530 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
5531 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
5532 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5533 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
5534 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
5535 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
5536 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
5537 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
5538 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
5539 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
5540 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
5541 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
5542 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
5543 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>
test_vld3q_lane_s32(int32_t const * a,int32x4x3_t b)5544 int32x4x3_t test_vld3q_lane_s32(int32_t const * a, int32x4x3_t b) {
5545   return vld3q_lane_s32(a, b, 3);
5546 }
5547 
5548 // CHECK-LABEL: @test_vld3q_lane_f16(
5549 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
5550 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
5551 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x3_t, align 16
5552 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
5553 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
5554 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5555 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
5556 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
5557 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5558 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x3_t* [[__RET]] to i8*
5559 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
5560 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5561 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
5562 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
5563 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
5564 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5565 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
5566 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
5567 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
5568 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
5569 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
5570 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
5571 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
5572 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
5573 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
5574 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
5575 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>
test_vld3q_lane_f16(float16_t const * a,float16x8x3_t b)5576 float16x8x3_t test_vld3q_lane_f16(float16_t const * a, float16x8x3_t b) {
5577   return vld3q_lane_f16(a, b, 7);
5578 }
5579 
5580 // CHECK-LABEL: @test_vld3q_lane_f32(
5581 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
5582 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
5583 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x3_t, align 16
5584 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
5585 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
5586 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5587 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
5588 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
5589 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5590 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x3_t* [[__RET]] to i8*
5591 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
5592 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5593 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
5594 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
5595 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
5596 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5597 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
5598 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
5599 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
5600 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
5601 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
5602 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
5603 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
5604 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
5605 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
5606 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
5607 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>
test_vld3q_lane_f32(float32_t const * a,float32x4x3_t b)5608 float32x4x3_t test_vld3q_lane_f32(float32_t const * a, float32x4x3_t b) {
5609   return vld3q_lane_f32(a, b, 3);
5610 }
5611 
5612 // CHECK-LABEL: @test_vld3q_lane_p16(
5613 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
5614 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
5615 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x3_t, align 16
5616 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
5617 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
5618 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
5619 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
5620 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
5621 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
5622 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x3_t* [[__RET]] to i8*
5623 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5624 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5625 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
5626 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
5627 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
5628 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5629 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
5630 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
5631 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
5632 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
5633 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
5634 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
5635 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
5636 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
5637 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
5638 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
5639 // CHECK:   [[VLD3Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>
test_vld3q_lane_p16(poly16_t const * a,poly16x8x3_t b)5640 poly16x8x3_t test_vld3q_lane_p16(poly16_t const * a, poly16x8x3_t b) {
5641   return vld3q_lane_p16(a, b, 7);
5642 }
5643 
5644 // CHECK-LABEL: @test_vld3_lane_u8(
5645 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
5646 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
5647 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x3_t, align 8
5648 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
5649 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5650 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5651 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
5652 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
5653 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5654 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x3_t* [[__RET]] to i8*
5655 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5656 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5657 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5658 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5659 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5660 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5661 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
5662 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5663 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5664 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_u8(uint8_t const * a,uint8x8x3_t b)5665 uint8x8x3_t test_vld3_lane_u8(uint8_t const * a, uint8x8x3_t b) {
5666   return vld3_lane_u8(a, b, 7);
5667 }
5668 
5669 // CHECK-LABEL: @test_vld3_lane_u16(
5670 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
5671 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
5672 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x3_t, align 8
5673 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
5674 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5675 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5676 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
5677 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
5678 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5679 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x3_t* [[__RET]] to i8*
5680 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5681 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5682 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5683 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5684 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5685 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5686 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5687 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5688 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5689 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
5690 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5691 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5692 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5693 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5694 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5695 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5696 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_u16(uint16_t const * a,uint16x4x3_t b)5697 uint16x4x3_t test_vld3_lane_u16(uint16_t const * a, uint16x4x3_t b) {
5698   return vld3_lane_u16(a, b, 3);
5699 }
5700 
5701 // CHECK-LABEL: @test_vld3_lane_u32(
5702 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
5703 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
5704 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x3_t, align 8
5705 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
5706 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5707 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5708 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
5709 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
5710 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5711 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x3_t* [[__RET]] to i8*
5712 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5713 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5714 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5715 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5716 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5717 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5718 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5719 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5720 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5721 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
5722 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5723 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5724 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5725 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5726 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5727 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5728 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_u32(uint32_t const * a,uint32x2x3_t b)5729 uint32x2x3_t test_vld3_lane_u32(uint32_t const * a, uint32x2x3_t b) {
5730   return vld3_lane_u32(a, b, 1);
5731 }
5732 
5733 // CHECK-LABEL: @test_vld3_lane_s8(
5734 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
5735 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
5736 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x3_t, align 8
5737 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
5738 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5739 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5740 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
5741 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
5742 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5743 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x3_t* [[__RET]] to i8*
5744 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5745 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5746 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5747 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5748 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5749 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5750 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
5751 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5752 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5753 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_s8(int8_t const * a,int8x8x3_t b)5754 int8x8x3_t test_vld3_lane_s8(int8_t const * a, int8x8x3_t b) {
5755   return vld3_lane_s8(a, b, 7);
5756 }
5757 
5758 // CHECK-LABEL: @test_vld3_lane_s16(
5759 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
5760 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
5761 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x3_t, align 8
5762 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
5763 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5764 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5765 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
5766 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
5767 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5768 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x3_t* [[__RET]] to i8*
5769 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5770 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5771 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5772 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5773 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5774 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5775 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5776 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5777 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5778 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
5779 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5780 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5781 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5782 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5783 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5784 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5785 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_s16(int16_t const * a,int16x4x3_t b)5786 int16x4x3_t test_vld3_lane_s16(int16_t const * a, int16x4x3_t b) {
5787   return vld3_lane_s16(a, b, 3);
5788 }
5789 
5790 // CHECK-LABEL: @test_vld3_lane_s32(
5791 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
5792 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
5793 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x3_t, align 8
5794 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
5795 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
5796 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5797 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
5798 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
5799 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5800 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x3_t* [[__RET]] to i8*
5801 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
5802 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5803 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
5804 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
5805 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
5806 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5807 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
5808 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
5809 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
5810 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
5811 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
5812 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
5813 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
5814 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
5815 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
5816 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
5817 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>
test_vld3_lane_s32(int32_t const * a,int32x2x3_t b)5818 int32x2x3_t test_vld3_lane_s32(int32_t const * a, int32x2x3_t b) {
5819   return vld3_lane_s32(a, b, 1);
5820 }
5821 
5822 // CHECK-LABEL: @test_vld3_lane_f16(
5823 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
5824 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
5825 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x3_t, align 8
5826 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
5827 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
5828 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5829 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
5830 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
5831 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5832 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x3_t* [[__RET]] to i8*
5833 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
5834 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5835 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
5836 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
5837 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
5838 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5839 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
5840 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
5841 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
5842 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
5843 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
5844 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
5845 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
5846 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
5847 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
5848 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
5849 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>
test_vld3_lane_f16(float16_t const * a,float16x4x3_t b)5850 float16x4x3_t test_vld3_lane_f16(float16_t const * a, float16x4x3_t b) {
5851   return vld3_lane_f16(a, b, 3);
5852 }
5853 
5854 // CHECK-LABEL: @test_vld3_lane_f32(
5855 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
5856 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
5857 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x3_t, align 8
5858 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
5859 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
5860 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5861 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
5862 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
5863 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5864 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x3_t* [[__RET]] to i8*
5865 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
5866 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5867 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
5868 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
5869 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
5870 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5871 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
5872 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
5873 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
5874 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
5875 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
5876 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
5877 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
5878 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
5879 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
5880 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
5881 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>
test_vld3_lane_f32(float32_t const * a,float32x2x3_t b)5882 float32x2x3_t test_vld3_lane_f32(float32_t const * a, float32x2x3_t b) {
5883   return vld3_lane_f32(a, b, 1);
5884 }
5885 
5886 // CHECK-LABEL: @test_vld3_lane_p8(
5887 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
5888 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
5889 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x3_t, align 8
5890 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
5891 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
5892 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5893 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
5894 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
5895 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5896 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x3_t* [[__RET]] to i8*
5897 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5898 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
5899 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
5900 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5901 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
5902 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
5903 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
5904 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
5905 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
5906 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>
test_vld3_lane_p8(poly8_t const * a,poly8x8x3_t b)5907 poly8x8x3_t test_vld3_lane_p8(poly8_t const * a, poly8x8x3_t b) {
5908   return vld3_lane_p8(a, b, 7);
5909 }
5910 
5911 // CHECK-LABEL: @test_vld3_lane_p16(
5912 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
5913 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
5914 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x3_t, align 8
5915 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
5916 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
5917 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
5918 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
5919 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
5920 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
5921 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x3_t* [[__RET]] to i8*
5922 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
5923 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5924 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
5925 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
5926 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
5927 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5928 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
5929 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
5930 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
5931 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
5932 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
5933 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
5934 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
5935 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
5936 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
5937 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
5938 // CHECK:   [[VLD3_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>
test_vld3_lane_p16(poly16_t const * a,poly16x4x3_t b)5939 poly16x4x3_t test_vld3_lane_p16(poly16_t const * a, poly16x4x3_t b) {
5940   return vld3_lane_p16(a, b, 3);
5941 }
5942 
5943 // CHECK-LABEL: @test_vld4q_u8(
5944 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x16x4_t, align 16
5945 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x4_t* [[__RET]] to i8*
5946 // CHECK:   [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_u8(uint8_t const * a)5947 uint8x16x4_t test_vld4q_u8(uint8_t const * a) {
5948   return vld4q_u8(a);
5949 }
5950 
5951 // CHECK-LABEL: @test_vld4q_u16(
5952 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
5953 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
5954 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5955 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_u16(uint16_t const * a)5956 uint16x8x4_t test_vld4q_u16(uint16_t const * a) {
5957   return vld4q_u16(a);
5958 }
5959 
5960 // CHECK-LABEL: @test_vld4q_u32(
5961 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
5962 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
5963 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5964 // CHECK:   [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_u32(uint32_t const * a)5965 uint32x4x4_t test_vld4q_u32(uint32_t const * a) {
5966   return vld4q_u32(a);
5967 }
5968 
5969 // CHECK-LABEL: @test_vld4q_s8(
5970 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x16x4_t, align 16
5971 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x4_t* [[__RET]] to i8*
5972 // CHECK:   [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_s8(int8_t const * a)5973 int8x16x4_t test_vld4q_s8(int8_t const * a) {
5974   return vld4q_s8(a);
5975 }
5976 
5977 // CHECK-LABEL: @test_vld4q_s16(
5978 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
5979 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
5980 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
5981 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_s16(int16_t const * a)5982 int16x8x4_t test_vld4q_s16(int16_t const * a) {
5983   return vld4q_s16(a);
5984 }
5985 
5986 // CHECK-LABEL: @test_vld4q_s32(
5987 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
5988 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
5989 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
5990 // CHECK:   [[VLD4Q_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_s32(int32_t const * a)5991 int32x4x4_t test_vld4q_s32(int32_t const * a) {
5992   return vld4q_s32(a);
5993 }
5994 
5995 // CHECK-LABEL: @test_vld4q_f16(
5996 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
5997 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
5998 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
5999 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_f16(float16_t const * a)6000 float16x8x4_t test_vld4q_f16(float16_t const * a) {
6001   return vld4q_f16(a);
6002 }
6003 
6004 // CHECK-LABEL: @test_vld4q_f32(
6005 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
6006 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
6007 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
6008 // CHECK:   [[VLD4Q_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_f32(float32_t const * a)6009 float32x4x4_t test_vld4q_f32(float32_t const * a) {
6010   return vld4q_f32(a);
6011 }
6012 
6013 // CHECK-LABEL: @test_vld4q_p8(
6014 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x16x4_t, align 16
6015 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x4_t* [[__RET]] to i8*
6016 // CHECK:   [[VLD4Q_V:%.*]] = call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>
test_vld4q_p8(poly8_t const * a)6017 poly8x16x4_t test_vld4q_p8(poly8_t const * a) {
6018   return vld4q_p8(a);
6019 }
6020 
6021 // CHECK-LABEL: @test_vld4q_p16(
6022 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
6023 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
6024 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
6025 // CHECK:   [[VLD4Q_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_p16(poly16_t const * a)6026 poly16x8x4_t test_vld4q_p16(poly16_t const * a) {
6027   return vld4q_p16(a);
6028 }
6029 
6030 // CHECK-LABEL: @test_vld4_u8(
6031 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
6032 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
6033 // CHECK:   [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_u8(uint8_t const * a)6034 uint8x8x4_t test_vld4_u8(uint8_t const * a) {
6035   return vld4_u8(a);
6036 }
6037 
6038 // CHECK-LABEL: @test_vld4_u16(
6039 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
6040 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
6041 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
6042 // CHECK:   [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_u16(uint16_t const * a)6043 uint16x4x4_t test_vld4_u16(uint16_t const * a) {
6044   return vld4_u16(a);
6045 }
6046 
6047 // CHECK-LABEL: @test_vld4_u32(
6048 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
6049 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6050 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
6051 // CHECK:   [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_u32(uint32_t const * a)6052 uint32x2x4_t test_vld4_u32(uint32_t const * a) {
6053   return vld4_u32(a);
6054 }
6055 
6056 // CHECK-LABEL: @test_vld4_u64(
6057 // CHECK:   [[__RET:%.*]] = alloca %struct.uint64x1x4_t, align 8
6058 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint64x1x4_t* [[__RET]] to i8*
6059 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
6060 // CHECK:   [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_u64(uint64_t const * a)6061 uint64x1x4_t test_vld4_u64(uint64_t const * a) {
6062   return vld4_u64(a);
6063 }
6064 
6065 // CHECK-LABEL: @test_vld4_s8(
6066 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6067 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6068 // CHECK:   [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_s8(int8_t const * a)6069 int8x8x4_t test_vld4_s8(int8_t const * a) {
6070   return vld4_s8(a);
6071 }
6072 
6073 // CHECK-LABEL: @test_vld4_s16(
6074 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6075 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6076 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
6077 // CHECK:   [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_s16(int16_t const * a)6078 int16x4x4_t test_vld4_s16(int16_t const * a) {
6079   return vld4_s16(a);
6080 }
6081 
6082 // CHECK-LABEL: @test_vld4_s32(
6083 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6084 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6085 // CHECK:   [[TMP1:%.*]] = bitcast i32* %a to i8*
6086 // CHECK:   [[VLD4_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_s32(int32_t const * a)6087 int32x2x4_t test_vld4_s32(int32_t const * a) {
6088   return vld4_s32(a);
6089 }
6090 
6091 // CHECK-LABEL: @test_vld4_s64(
6092 // CHECK:   [[__RET:%.*]] = alloca %struct.int64x1x4_t, align 8
6093 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int64x1x4_t* [[__RET]] to i8*
6094 // CHECK:   [[TMP1:%.*]] = bitcast i64* %a to i8*
6095 // CHECK:   [[VLD4_V:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64>
test_vld4_s64(int64_t const * a)6096 int64x1x4_t test_vld4_s64(int64_t const * a) {
6097   return vld4_s64(a);
6098 }
6099 
6100 // CHECK-LABEL: @test_vld4_f16(
6101 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6102 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6103 // CHECK:   [[TMP1:%.*]] = bitcast half* %a to i8*
6104 // CHECK:   [[VLD4_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_f16(float16_t const * a)6105 float16x4x4_t test_vld4_f16(float16_t const * a) {
6106   return vld4_f16(a);
6107 }
6108 
6109 // CHECK-LABEL: @test_vld4_f32(
6110 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6111 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6112 // CHECK:   [[TMP1:%.*]] = bitcast float* %a to i8*
6113 // CHECK:   [[VLD4_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_f32(float32_t const * a)6114 float32x2x4_t test_vld4_f32(float32_t const * a) {
6115   return vld4_f32(a);
6116 }
6117 
6118 // CHECK-LABEL: @test_vld4_p8(
6119 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6120 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6121 // CHECK:   [[VLD4_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_p8(poly8_t const * a)6122 poly8x8x4_t test_vld4_p8(poly8_t const * a) {
6123   return vld4_p8(a);
6124 }
6125 
6126 // CHECK-LABEL: @test_vld4_p16(
6127 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6128 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6129 // CHECK:   [[TMP1:%.*]] = bitcast i16* %a to i8*
6130 // CHECK:   [[VLD4_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_p16(poly16_t const * a)6131 poly16x4x4_t test_vld4_p16(poly16_t const * a) {
6132   return vld4_p16(a);
6133 }
6134 
6135 // CHECK-LABEL: @test_vld4q_lane_u16(
6136 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
6137 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
6138 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x8x4_t, align 16
6139 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
6140 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6141 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6142 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
6143 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
6144 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6145 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x8x4_t* [[__RET]] to i8*
6146 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6147 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6148 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6149 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6150 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6151 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6152 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6153 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6154 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6155 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6156 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6157 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6158 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6159 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
6160 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6161 // CHECK:   [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6162 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6163 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6164 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6165 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6166 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6167 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_u16(uint16_t const * a,uint16x8x4_t b)6168 uint16x8x4_t test_vld4q_lane_u16(uint16_t const * a, uint16x8x4_t b) {
6169   return vld4q_lane_u16(a, b, 7);
6170 }
6171 
6172 // CHECK-LABEL: @test_vld4q_lane_u32(
6173 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
6174 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
6175 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x4x4_t, align 16
6176 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
6177 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6178 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6179 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
6180 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
6181 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6182 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x4x4_t* [[__RET]] to i8*
6183 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6184 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6185 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6186 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6187 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6188 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6189 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6190 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6191 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6192 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6193 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6194 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6195 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6196 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
6197 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6198 // CHECK:   [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6199 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6200 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6201 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6202 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6203 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6204 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_u32(uint32_t const * a,uint32x4x4_t b)6205 uint32x4x4_t test_vld4q_lane_u32(uint32_t const * a, uint32x4x4_t b) {
6206   return vld4q_lane_u32(a, b, 3);
6207 }
6208 
6209 // CHECK-LABEL: @test_vld4q_lane_s16(
6210 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
6211 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
6212 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x8x4_t, align 16
6213 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
6214 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6215 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6216 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
6217 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
6218 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6219 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x8x4_t* [[__RET]] to i8*
6220 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6221 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6222 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6223 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6224 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6225 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6226 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6227 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6228 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6229 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6230 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6231 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6232 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6233 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
6234 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6235 // CHECK:   [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6236 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6237 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6238 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6239 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6240 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6241 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_s16(int16_t const * a,int16x8x4_t b)6242 int16x8x4_t test_vld4q_lane_s16(int16_t const * a, int16x8x4_t b) {
6243   return vld4q_lane_s16(a, b, 7);
6244 }
6245 
6246 // CHECK-LABEL: @test_vld4q_lane_s32(
6247 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
6248 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
6249 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x4x4_t, align 16
6250 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
6251 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
6252 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6253 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
6254 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
6255 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6256 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x4x4_t* [[__RET]] to i8*
6257 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6258 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6259 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
6260 // CHECK:   [[TMP5:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
6261 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i32> [[TMP5]] to <16 x i8>
6262 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6263 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
6264 // CHECK:   [[TMP7:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
6265 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i32> [[TMP7]] to <16 x i8>
6266 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6267 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
6268 // CHECK:   [[TMP9:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
6269 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i32> [[TMP9]] to <16 x i8>
6270 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
6271 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
6272 // CHECK:   [[TMP11:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
6273 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i32> [[TMP11]] to <16 x i8>
6274 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x i32>
6275 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x i32>
6276 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x i32>
6277 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x i32>
6278 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>
test_vld4q_lane_s32(int32_t const * a,int32x4x4_t b)6279 int32x4x4_t test_vld4q_lane_s32(int32_t const * a, int32x4x4_t b) {
6280   return vld4q_lane_s32(a, b, 3);
6281 }
6282 
6283 // CHECK-LABEL: @test_vld4q_lane_f16(
6284 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
6285 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
6286 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x8x4_t, align 16
6287 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
6288 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
6289 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6290 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
6291 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
6292 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6293 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x8x4_t* [[__RET]] to i8*
6294 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
6295 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6296 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
6297 // CHECK:   [[TMP5:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
6298 // CHECK:   [[TMP6:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8>
6299 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6300 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
6301 // CHECK:   [[TMP7:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
6302 // CHECK:   [[TMP8:%.*]] = bitcast <8 x half> [[TMP7]] to <16 x i8>
6303 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6304 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
6305 // CHECK:   [[TMP9:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
6306 // CHECK:   [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <16 x i8>
6307 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
6308 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
6309 // CHECK:   [[TMP11:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
6310 // CHECK:   [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <16 x i8>
6311 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x half>
6312 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x half>
6313 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x half>
6314 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x half>
6315 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x half>, <8 x half>, <8 x half>, <8 x half>
test_vld4q_lane_f16(float16_t const * a,float16x8x4_t b)6316 float16x8x4_t test_vld4q_lane_f16(float16_t const * a, float16x8x4_t b) {
6317   return vld4q_lane_f16(a, b, 7);
6318 }
6319 
6320 // CHECK-LABEL: @test_vld4q_lane_f32(
6321 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
6322 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
6323 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x4x4_t, align 16
6324 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
6325 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
6326 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6327 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
6328 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
6329 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6330 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x4x4_t* [[__RET]] to i8*
6331 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
6332 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6333 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
6334 // CHECK:   [[TMP5:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
6335 // CHECK:   [[TMP6:%.*]] = bitcast <4 x float> [[TMP5]] to <16 x i8>
6336 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6337 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
6338 // CHECK:   [[TMP7:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
6339 // CHECK:   [[TMP8:%.*]] = bitcast <4 x float> [[TMP7]] to <16 x i8>
6340 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6341 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
6342 // CHECK:   [[TMP9:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
6343 // CHECK:   [[TMP10:%.*]] = bitcast <4 x float> [[TMP9]] to <16 x i8>
6344 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
6345 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
6346 // CHECK:   [[TMP11:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
6347 // CHECK:   [[TMP12:%.*]] = bitcast <4 x float> [[TMP11]] to <16 x i8>
6348 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <4 x float>
6349 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <4 x float>
6350 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <4 x float>
6351 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <4 x float>
6352 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <4 x float>, <4 x float>, <4 x float>, <4 x float>
test_vld4q_lane_f32(float32_t const * a,float32x4x4_t b)6353 float32x4x4_t test_vld4q_lane_f32(float32_t const * a, float32x4x4_t b) {
6354   return vld4q_lane_f32(a, b, 3);
6355 }
6356 
6357 // CHECK-LABEL: @test_vld4q_lane_p16(
6358 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
6359 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
6360 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x8x4_t, align 16
6361 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
6362 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
6363 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
6364 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
6365 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
6366 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
6367 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x8x4_t* [[__RET]] to i8*
6368 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6369 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6370 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
6371 // CHECK:   [[TMP5:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
6372 // CHECK:   [[TMP6:%.*]] = bitcast <8 x i16> [[TMP5]] to <16 x i8>
6373 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6374 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
6375 // CHECK:   [[TMP7:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
6376 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i16> [[TMP7]] to <16 x i8>
6377 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6378 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
6379 // CHECK:   [[TMP9:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
6380 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i16> [[TMP9]] to <16 x i8>
6381 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
6382 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
6383 // CHECK:   [[TMP11:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
6384 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i16> [[TMP11]] to <16 x i8>
6385 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16>
6386 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP8]] to <8 x i16>
6387 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP10]] to <8 x i16>
6388 // CHECK:   [[TMP16:%.*]] = bitcast <16 x i8> [[TMP12]] to <8 x i16>
6389 // CHECK:   [[VLD4Q_LANE_V:%.*]] = call { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16>
test_vld4q_lane_p16(poly16_t const * a,poly16x8x4_t b)6390 poly16x8x4_t test_vld4q_lane_p16(poly16_t const * a, poly16x8x4_t b) {
6391   return vld4q_lane_p16(a, b, 7);
6392 }
6393 
6394 // CHECK-LABEL: @test_vld4_lane_u8(
6395 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
6396 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
6397 // CHECK:   [[__RET:%.*]] = alloca %struct.uint8x8x4_t, align 8
6398 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
6399 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6400 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6401 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
6402 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
6403 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6404 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint8x8x4_t* [[__RET]] to i8*
6405 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6406 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6407 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6408 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6409 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6410 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6411 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6412 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6413 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6414 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
6415 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6416 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6417 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_u8(uint8_t const * a,uint8x8x4_t b)6418 uint8x8x4_t test_vld4_lane_u8(uint8_t const * a, uint8x8x4_t b) {
6419   return vld4_lane_u8(a, b, 7);
6420 }
6421 
6422 // CHECK-LABEL: @test_vld4_lane_u16(
6423 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
6424 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
6425 // CHECK:   [[__RET:%.*]] = alloca %struct.uint16x4x4_t, align 8
6426 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
6427 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6428 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6429 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
6430 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
6431 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6432 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint16x4x4_t* [[__RET]] to i8*
6433 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6434 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6435 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6436 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6437 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6438 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6439 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6440 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6441 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6442 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6443 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6444 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6445 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6446 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
6447 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6448 // CHECK:   [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6449 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6450 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6451 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6452 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6453 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6454 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_u16(uint16_t const * a,uint16x4x4_t b)6455 uint16x4x4_t test_vld4_lane_u16(uint16_t const * a, uint16x4x4_t b) {
6456   return vld4_lane_u16(a, b, 3);
6457 }
6458 
6459 // CHECK-LABEL: @test_vld4_lane_u32(
6460 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
6461 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
6462 // CHECK:   [[__RET:%.*]] = alloca %struct.uint32x2x4_t, align 8
6463 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
6464 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6465 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6466 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
6467 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
6468 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6469 // CHECK:   [[TMP3:%.*]] = bitcast %struct.uint32x2x4_t* [[__RET]] to i8*
6470 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6471 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6472 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6473 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6474 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6475 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6476 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6477 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6478 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6479 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6480 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6481 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6482 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6483 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
6484 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6485 // CHECK:   [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6486 // CHECK:   [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6487 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6488 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6489 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6490 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6491 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_u32(uint32_t const * a,uint32x2x4_t b)6492 uint32x2x4_t test_vld4_lane_u32(uint32_t const * a, uint32x2x4_t b) {
6493   return vld4_lane_u32(a, b, 1);
6494 }
6495 
6496 // CHECK-LABEL: @test_vld4_lane_s8(
6497 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
6498 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
6499 // CHECK:   [[__RET:%.*]] = alloca %struct.int8x8x4_t, align 8
6500 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
6501 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6502 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6503 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
6504 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
6505 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6506 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int8x8x4_t* [[__RET]] to i8*
6507 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6508 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6509 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6510 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6511 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6512 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6513 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6514 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6515 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6516 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
6517 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6518 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6519 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_s8(int8_t const * a,int8x8x4_t b)6520 int8x8x4_t test_vld4_lane_s8(int8_t const * a, int8x8x4_t b) {
6521   return vld4_lane_s8(a, b, 7);
6522 }
6523 
6524 // CHECK-LABEL: @test_vld4_lane_s16(
6525 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
6526 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
6527 // CHECK:   [[__RET:%.*]] = alloca %struct.int16x4x4_t, align 8
6528 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
6529 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6530 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6531 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
6532 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
6533 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6534 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int16x4x4_t* [[__RET]] to i8*
6535 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6536 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6537 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6538 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6539 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6540 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6541 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6542 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6543 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6544 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6545 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6546 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6547 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6548 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
6549 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6550 // CHECK:   [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6551 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6552 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6553 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6554 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6555 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6556 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_s16(int16_t const * a,int16x4x4_t b)6557 int16x4x4_t test_vld4_lane_s16(int16_t const * a, int16x4x4_t b) {
6558   return vld4_lane_s16(a, b, 3);
6559 }
6560 
6561 // CHECK-LABEL: @test_vld4_lane_s32(
6562 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
6563 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
6564 // CHECK:   [[__RET:%.*]] = alloca %struct.int32x2x4_t, align 8
6565 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
6566 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
6567 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6568 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
6569 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
6570 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6571 // CHECK:   [[TMP3:%.*]] = bitcast %struct.int32x2x4_t* [[__RET]] to i8*
6572 // CHECK:   [[TMP4:%.*]] = bitcast i32* %a to i8*
6573 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6574 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
6575 // CHECK:   [[TMP5:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
6576 // CHECK:   [[TMP6:%.*]] = bitcast <2 x i32> [[TMP5]] to <8 x i8>
6577 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6578 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
6579 // CHECK:   [[TMP7:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
6580 // CHECK:   [[TMP8:%.*]] = bitcast <2 x i32> [[TMP7]] to <8 x i8>
6581 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6582 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
6583 // CHECK:   [[TMP9:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
6584 // CHECK:   [[TMP10:%.*]] = bitcast <2 x i32> [[TMP9]] to <8 x i8>
6585 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
6586 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
6587 // CHECK:   [[TMP11:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
6588 // CHECK:   [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <8 x i8>
6589 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x i32>
6590 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x i32>
6591 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x i32>
6592 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x i32>
6593 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>
test_vld4_lane_s32(int32_t const * a,int32x2x4_t b)6594 int32x2x4_t test_vld4_lane_s32(int32_t const * a, int32x2x4_t b) {
6595   return vld4_lane_s32(a, b, 1);
6596 }
6597 
6598 // CHECK-LABEL: @test_vld4_lane_f16(
6599 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
6600 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
6601 // CHECK:   [[__RET:%.*]] = alloca %struct.float16x4x4_t, align 8
6602 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
6603 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
6604 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6605 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
6606 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
6607 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6608 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float16x4x4_t* [[__RET]] to i8*
6609 // CHECK:   [[TMP4:%.*]] = bitcast half* %a to i8*
6610 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6611 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
6612 // CHECK:   [[TMP5:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
6613 // CHECK:   [[TMP6:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8>
6614 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6615 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
6616 // CHECK:   [[TMP7:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
6617 // CHECK:   [[TMP8:%.*]] = bitcast <4 x half> [[TMP7]] to <8 x i8>
6618 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6619 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
6620 // CHECK:   [[TMP9:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
6621 // CHECK:   [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <8 x i8>
6622 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
6623 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
6624 // CHECK:   [[TMP11:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
6625 // CHECK:   [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <8 x i8>
6626 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x half>
6627 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x half>
6628 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x half>
6629 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x half>
6630 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x half>, <4 x half>, <4 x half>, <4 x half>
test_vld4_lane_f16(float16_t const * a,float16x4x4_t b)6631 float16x4x4_t test_vld4_lane_f16(float16_t const * a, float16x4x4_t b) {
6632   return vld4_lane_f16(a, b, 3);
6633 }
6634 
6635 // CHECK-LABEL: @test_vld4_lane_f32(
6636 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
6637 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
6638 // CHECK:   [[__RET:%.*]] = alloca %struct.float32x2x4_t, align 8
6639 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
6640 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
6641 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6642 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
6643 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
6644 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6645 // CHECK:   [[TMP3:%.*]] = bitcast %struct.float32x2x4_t* [[__RET]] to i8*
6646 // CHECK:   [[TMP4:%.*]] = bitcast float* %a to i8*
6647 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6648 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
6649 // CHECK:   [[TMP5:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
6650 // CHECK:   [[TMP6:%.*]] = bitcast <2 x float> [[TMP5]] to <8 x i8>
6651 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6652 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
6653 // CHECK:   [[TMP7:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
6654 // CHECK:   [[TMP8:%.*]] = bitcast <2 x float> [[TMP7]] to <8 x i8>
6655 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6656 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
6657 // CHECK:   [[TMP9:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
6658 // CHECK:   [[TMP10:%.*]] = bitcast <2 x float> [[TMP9]] to <8 x i8>
6659 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
6660 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
6661 // CHECK:   [[TMP11:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
6662 // CHECK:   [[TMP12:%.*]] = bitcast <2 x float> [[TMP11]] to <8 x i8>
6663 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <2 x float>
6664 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <2 x float>
6665 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <2 x float>
6666 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <2 x float>
6667 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <2 x float>, <2 x float>, <2 x float>, <2 x float>
test_vld4_lane_f32(float32_t const * a,float32x2x4_t b)6668 float32x2x4_t test_vld4_lane_f32(float32_t const * a, float32x2x4_t b) {
6669   return vld4_lane_f32(a, b, 1);
6670 }
6671 
6672 // CHECK-LABEL: @test_vld4_lane_p8(
6673 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
6674 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
6675 // CHECK:   [[__RET:%.*]] = alloca %struct.poly8x8x4_t, align 8
6676 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
6677 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
6678 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6679 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
6680 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
6681 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6682 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly8x8x4_t* [[__RET]] to i8*
6683 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6684 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
6685 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
6686 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6687 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
6688 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
6689 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6690 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
6691 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
6692 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
6693 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
6694 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
6695 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>
test_vld4_lane_p8(poly8_t const * a,poly8x8x4_t b)6696 poly8x8x4_t test_vld4_lane_p8(poly8_t const * a, poly8x8x4_t b) {
6697   return vld4_lane_p8(a, b, 7);
6698 }
6699 
6700 // CHECK-LABEL: @test_vld4_lane_p16(
6701 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
6702 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
6703 // CHECK:   [[__RET:%.*]] = alloca %struct.poly16x4x4_t, align 8
6704 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
6705 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
6706 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
6707 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
6708 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
6709 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
6710 // CHECK:   [[TMP3:%.*]] = bitcast %struct.poly16x4x4_t* [[__RET]] to i8*
6711 // CHECK:   [[TMP4:%.*]] = bitcast i16* %a to i8*
6712 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6713 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
6714 // CHECK:   [[TMP5:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
6715 // CHECK:   [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <8 x i8>
6716 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6717 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
6718 // CHECK:   [[TMP7:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
6719 // CHECK:   [[TMP8:%.*]] = bitcast <4 x i16> [[TMP7]] to <8 x i8>
6720 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6721 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
6722 // CHECK:   [[TMP9:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
6723 // CHECK:   [[TMP10:%.*]] = bitcast <4 x i16> [[TMP9]] to <8 x i8>
6724 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
6725 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
6726 // CHECK:   [[TMP11:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
6727 // CHECK:   [[TMP12:%.*]] = bitcast <4 x i16> [[TMP11]] to <8 x i8>
6728 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16>
6729 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP8]] to <4 x i16>
6730 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP10]] to <4 x i16>
6731 // CHECK:   [[TMP16:%.*]] = bitcast <8 x i8> [[TMP12]] to <4 x i16>
6732 // CHECK:   [[VLD4_LANE_V:%.*]] = call { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>
test_vld4_lane_p16(poly16_t const * a,poly16x4x4_t b)6733 poly16x4x4_t test_vld4_lane_p16(poly16_t const * a, poly16x4x4_t b) {
6734   return vld4_lane_p16(a, b, 3);
6735 }
6736 
6737 // CHECK-LABEL: @test_vmax_s8(
6738 // CHECK:   [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
6739 // CHECK:   ret <8 x i8> [[VMAX_V_I]]
test_vmax_s8(int8x8_t a,int8x8_t b)6740 int8x8_t test_vmax_s8(int8x8_t a, int8x8_t b) {
6741   return vmax_s8(a, b);
6742 }
6743 
6744 // CHECK-LABEL: @test_vmax_s16(
6745 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6746 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6747 // CHECK:   [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
6748 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6749 // CHECK:   ret <4 x i16> [[VMAX_V2_I]]
test_vmax_s16(int16x4_t a,int16x4_t b)6750 int16x4_t test_vmax_s16(int16x4_t a, int16x4_t b) {
6751   return vmax_s16(a, b);
6752 }
6753 
6754 // CHECK-LABEL: @test_vmax_s32(
6755 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6756 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6757 // CHECK:   [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
6758 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6759 // CHECK:   ret <2 x i32> [[VMAX_V2_I]]
test_vmax_s32(int32x2_t a,int32x2_t b)6760 int32x2_t test_vmax_s32(int32x2_t a, int32x2_t b) {
6761   return vmax_s32(a, b);
6762 }
6763 
6764 // CHECK-LABEL: @test_vmax_u8(
6765 // CHECK:   [[VMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
6766 // CHECK:   ret <8 x i8> [[VMAX_V_I]]
test_vmax_u8(uint8x8_t a,uint8x8_t b)6767 uint8x8_t test_vmax_u8(uint8x8_t a, uint8x8_t b) {
6768   return vmax_u8(a, b);
6769 }
6770 
6771 // CHECK-LABEL: @test_vmax_u16(
6772 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6773 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6774 // CHECK:   [[VMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
6775 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <4 x i16> [[VMAX_V2_I]] to <8 x i8>
6776 // CHECK:   ret <4 x i16> [[VMAX_V2_I]]
test_vmax_u16(uint16x4_t a,uint16x4_t b)6777 uint16x4_t test_vmax_u16(uint16x4_t a, uint16x4_t b) {
6778   return vmax_u16(a, b);
6779 }
6780 
6781 // CHECK-LABEL: @test_vmax_u32(
6782 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6783 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6784 // CHECK:   [[VMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
6785 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <2 x i32> [[VMAX_V2_I]] to <8 x i8>
6786 // CHECK:   ret <2 x i32> [[VMAX_V2_I]]
test_vmax_u32(uint32x2_t a,uint32x2_t b)6787 uint32x2_t test_vmax_u32(uint32x2_t a, uint32x2_t b) {
6788   return vmax_u32(a, b);
6789 }
6790 
6791 // CHECK-LABEL: @test_vmax_f32(
6792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6793 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6794 // CHECK:   [[VMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %a, <2 x float> %b)
6795 // CHECK:   [[VMAX_V3_I:%.*]] = bitcast <2 x float> [[VMAX_V2_I]] to <8 x i8>
6796 // CHECK:   ret <2 x float> [[VMAX_V2_I]]
test_vmax_f32(float32x2_t a,float32x2_t b)6797 float32x2_t test_vmax_f32(float32x2_t a, float32x2_t b) {
6798   return vmax_f32(a, b);
6799 }
6800 
6801 // CHECK-LABEL: @test_vmaxq_s8(
6802 // CHECK:   [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8> %a, <16 x i8> %b)
6803 // CHECK:   ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_s8(int8x16_t a,int8x16_t b)6804 int8x16_t test_vmaxq_s8(int8x16_t a, int8x16_t b) {
6805   return vmaxq_s8(a, b);
6806 }
6807 
6808 // CHECK-LABEL: @test_vmaxq_s16(
6809 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6810 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6811 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16> %a, <8 x i16> %b)
6812 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6813 // CHECK:   ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_s16(int16x8_t a,int16x8_t b)6814 int16x8_t test_vmaxq_s16(int16x8_t a, int16x8_t b) {
6815   return vmaxq_s16(a, b);
6816 }
6817 
6818 // CHECK-LABEL: @test_vmaxq_s32(
6819 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6820 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6821 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxs.v4i32(<4 x i32> %a, <4 x i32> %b)
6822 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6823 // CHECK:   ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_s32(int32x4_t a,int32x4_t b)6824 int32x4_t test_vmaxq_s32(int32x4_t a, int32x4_t b) {
6825   return vmaxq_s32(a, b);
6826 }
6827 
6828 // CHECK-LABEL: @test_vmaxq_u8(
6829 // CHECK:   [[VMAXQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8> %a, <16 x i8> %b)
6830 // CHECK:   ret <16 x i8> [[VMAXQ_V_I]]
test_vmaxq_u8(uint8x16_t a,uint8x16_t b)6831 uint8x16_t test_vmaxq_u8(uint8x16_t a, uint8x16_t b) {
6832   return vmaxq_u8(a, b);
6833 }
6834 
6835 // CHECK-LABEL: @test_vmaxq_u16(
6836 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6837 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6838 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16> %a, <8 x i16> %b)
6839 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <8 x i16> [[VMAXQ_V2_I]] to <16 x i8>
6840 // CHECK:   ret <8 x i16> [[VMAXQ_V2_I]]
test_vmaxq_u16(uint16x8_t a,uint16x8_t b)6841 uint16x8_t test_vmaxq_u16(uint16x8_t a, uint16x8_t b) {
6842   return vmaxq_u16(a, b);
6843 }
6844 
6845 // CHECK-LABEL: @test_vmaxq_u32(
6846 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6847 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6848 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32> %a, <4 x i32> %b)
6849 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <4 x i32> [[VMAXQ_V2_I]] to <16 x i8>
6850 // CHECK:   ret <4 x i32> [[VMAXQ_V2_I]]
test_vmaxq_u32(uint32x4_t a,uint32x4_t b)6851 uint32x4_t test_vmaxq_u32(uint32x4_t a, uint32x4_t b) {
6852   return vmaxq_u32(a, b);
6853 }
6854 
6855 // CHECK-LABEL: @test_vmaxq_f32(
6856 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6857 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6858 // CHECK:   [[VMAXQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %a, <4 x float> %b)
6859 // CHECK:   [[VMAXQ_V3_I:%.*]] = bitcast <4 x float> [[VMAXQ_V2_I]] to <16 x i8>
6860 // CHECK:   ret <4 x float> [[VMAXQ_V2_I]]
test_vmaxq_f32(float32x4_t a,float32x4_t b)6861 float32x4_t test_vmaxq_f32(float32x4_t a, float32x4_t b) {
6862   return vmaxq_f32(a, b);
6863 }
6864 
6865 // CHECK-LABEL: @test_vmin_s8(
6866 // CHECK:   [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmins.v8i8(<8 x i8> %a, <8 x i8> %b)
6867 // CHECK:   ret <8 x i8> [[VMIN_V_I]]
test_vmin_s8(int8x8_t a,int8x8_t b)6868 int8x8_t test_vmin_s8(int8x8_t a, int8x8_t b) {
6869   return vmin_s8(a, b);
6870 }
6871 
6872 // CHECK-LABEL: @test_vmin_s16(
6873 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6874 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6875 // CHECK:   [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vmins.v4i16(<4 x i16> %a, <4 x i16> %b)
6876 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6877 // CHECK:   ret <4 x i16> [[VMIN_V2_I]]
test_vmin_s16(int16x4_t a,int16x4_t b)6878 int16x4_t test_vmin_s16(int16x4_t a, int16x4_t b) {
6879   return vmin_s16(a, b);
6880 }
6881 
6882 // CHECK-LABEL: @test_vmin_s32(
6883 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6884 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6885 // CHECK:   [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vmins.v2i32(<2 x i32> %a, <2 x i32> %b)
6886 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6887 // CHECK:   ret <2 x i32> [[VMIN_V2_I]]
test_vmin_s32(int32x2_t a,int32x2_t b)6888 int32x2_t test_vmin_s32(int32x2_t a, int32x2_t b) {
6889   return vmin_s32(a, b);
6890 }
6891 
6892 // CHECK-LABEL: @test_vmin_u8(
6893 // CHECK:   [[VMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vminu.v8i8(<8 x i8> %a, <8 x i8> %b)
6894 // CHECK:   ret <8 x i8> [[VMIN_V_I]]
test_vmin_u8(uint8x8_t a,uint8x8_t b)6895 uint8x8_t test_vmin_u8(uint8x8_t a, uint8x8_t b) {
6896   return vmin_u8(a, b);
6897 }
6898 
6899 // CHECK-LABEL: @test_vmin_u16(
6900 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
6901 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
6902 // CHECK:   [[VMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16> %a, <4 x i16> %b)
6903 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <4 x i16> [[VMIN_V2_I]] to <8 x i8>
6904 // CHECK:   ret <4 x i16> [[VMIN_V2_I]]
test_vmin_u16(uint16x4_t a,uint16x4_t b)6905 uint16x4_t test_vmin_u16(uint16x4_t a, uint16x4_t b) {
6906   return vmin_u16(a, b);
6907 }
6908 
6909 // CHECK-LABEL: @test_vmin_u32(
6910 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
6911 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
6912 // CHECK:   [[VMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32> %a, <2 x i32> %b)
6913 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <2 x i32> [[VMIN_V2_I]] to <8 x i8>
6914 // CHECK:   ret <2 x i32> [[VMIN_V2_I]]
test_vmin_u32(uint32x2_t a,uint32x2_t b)6915 uint32x2_t test_vmin_u32(uint32x2_t a, uint32x2_t b) {
6916   return vmin_u32(a, b);
6917 }
6918 
6919 // CHECK-LABEL: @test_vmin_f32(
6920 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
6921 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
6922 // CHECK:   [[VMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %a, <2 x float> %b)
6923 // CHECK:   [[VMIN_V3_I:%.*]] = bitcast <2 x float> [[VMIN_V2_I]] to <8 x i8>
6924 // CHECK:   ret <2 x float> [[VMIN_V2_I]]
test_vmin_f32(float32x2_t a,float32x2_t b)6925 float32x2_t test_vmin_f32(float32x2_t a, float32x2_t b) {
6926   return vmin_f32(a, b);
6927 }
6928 
6929 // CHECK-LABEL: @test_vminq_s8(
6930 // CHECK:   [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8> %a, <16 x i8> %b)
6931 // CHECK:   ret <16 x i8> [[VMINQ_V_I]]
test_vminq_s8(int8x16_t a,int8x16_t b)6932 int8x16_t test_vminq_s8(int8x16_t a, int8x16_t b) {
6933   return vminq_s8(a, b);
6934 }
6935 
6936 // CHECK-LABEL: @test_vminq_s16(
6937 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6938 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6939 // CHECK:   [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16> %a, <8 x i16> %b)
6940 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6941 // CHECK:   ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_s16(int16x8_t a,int16x8_t b)6942 int16x8_t test_vminq_s16(int16x8_t a, int16x8_t b) {
6943   return vminq_s16(a, b);
6944 }
6945 
6946 // CHECK-LABEL: @test_vminq_s32(
6947 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6948 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6949 // CHECK:   [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmins.v4i32(<4 x i32> %a, <4 x i32> %b)
6950 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6951 // CHECK:   ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_s32(int32x4_t a,int32x4_t b)6952 int32x4_t test_vminq_s32(int32x4_t a, int32x4_t b) {
6953   return vminq_s32(a, b);
6954 }
6955 
6956 // CHECK-LABEL: @test_vminq_u8(
6957 // CHECK:   [[VMINQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8> %a, <16 x i8> %b)
6958 // CHECK:   ret <16 x i8> [[VMINQ_V_I]]
test_vminq_u8(uint8x16_t a,uint8x16_t b)6959 uint8x16_t test_vminq_u8(uint8x16_t a, uint8x16_t b) {
6960   return vminq_u8(a, b);
6961 }
6962 
6963 // CHECK-LABEL: @test_vminq_u16(
6964 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
6965 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
6966 // CHECK:   [[VMINQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16> %a, <8 x i16> %b)
6967 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <8 x i16> [[VMINQ_V2_I]] to <16 x i8>
6968 // CHECK:   ret <8 x i16> [[VMINQ_V2_I]]
test_vminq_u16(uint16x8_t a,uint16x8_t b)6969 uint16x8_t test_vminq_u16(uint16x8_t a, uint16x8_t b) {
6970   return vminq_u16(a, b);
6971 }
6972 
6973 // CHECK-LABEL: @test_vminq_u32(
6974 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
6975 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
6976 // CHECK:   [[VMINQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32> %a, <4 x i32> %b)
6977 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <4 x i32> [[VMINQ_V2_I]] to <16 x i8>
6978 // CHECK:   ret <4 x i32> [[VMINQ_V2_I]]
test_vminq_u32(uint32x4_t a,uint32x4_t b)6979 uint32x4_t test_vminq_u32(uint32x4_t a, uint32x4_t b) {
6980   return vminq_u32(a, b);
6981 }
6982 
6983 // CHECK-LABEL: @test_vminq_f32(
6984 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
6985 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
6986 // CHECK:   [[VMINQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %a, <4 x float> %b)
6987 // CHECK:   [[VMINQ_V3_I:%.*]] = bitcast <4 x float> [[VMINQ_V2_I]] to <16 x i8>
6988 // CHECK:   ret <4 x float> [[VMINQ_V2_I]]
test_vminq_f32(float32x4_t a,float32x4_t b)6989 float32x4_t test_vminq_f32(float32x4_t a, float32x4_t b) {
6990   return vminq_f32(a, b);
6991 }
6992 
6993 // CHECK-LABEL: @test_vmla_s8(
6994 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
6995 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
6996 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_s8(int8x8_t a,int8x8_t b,int8x8_t c)6997 int8x8_t test_vmla_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
6998   return vmla_s8(a, b, c);
6999 }
7000 
7001 // CHECK-LABEL: @test_vmla_s16(
7002 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7003 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7004 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_s16(int16x4_t a,int16x4_t b,int16x4_t c)7005 int16x4_t test_vmla_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7006   return vmla_s16(a, b, c);
7007 }
7008 
7009 // CHECK-LABEL: @test_vmla_s32(
7010 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7011 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7012 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_s32(int32x2_t a,int32x2_t b,int32x2_t c)7013 int32x2_t test_vmla_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7014   return vmla_s32(a, b, c);
7015 }
7016 
7017 // CHECK-LABEL: @test_vmla_f32(
7018 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, %c
7019 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
7020 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_f32(float32x2_t a,float32x2_t b,float32x2_t c)7021 float32x2_t test_vmla_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7022   return vmla_f32(a, b, c);
7023 }
7024 
7025 // CHECK-LABEL: @test_vmla_u8(
7026 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7027 // CHECK:   [[ADD_I:%.*]] = add <8 x i8> %a, [[MUL_I]]
7028 // CHECK:   ret <8 x i8> [[ADD_I]]
test_vmla_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)7029 uint8x8_t test_vmla_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
7030   return vmla_u8(a, b, c);
7031 }
7032 
7033 // CHECK-LABEL: @test_vmla_u16(
7034 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7035 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7036 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7037 uint16x4_t test_vmla_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7038   return vmla_u16(a, b, c);
7039 }
7040 
7041 // CHECK-LABEL: @test_vmla_u32(
7042 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7043 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7044 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7045 uint32x2_t test_vmla_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7046   return vmla_u32(a, b, c);
7047 }
7048 
7049 // CHECK-LABEL: @test_vmlaq_s8(
7050 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7051 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7052 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7053 int8x16_t test_vmlaq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7054   return vmlaq_s8(a, b, c);
7055 }
7056 
7057 // CHECK-LABEL: @test_vmlaq_s16(
7058 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7059 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7060 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7061 int16x8_t test_vmlaq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7062   return vmlaq_s16(a, b, c);
7063 }
7064 
7065 // CHECK-LABEL: @test_vmlaq_s32(
7066 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7067 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7068 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7069 int32x4_t test_vmlaq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7070   return vmlaq_s32(a, b, c);
7071 }
7072 
7073 // CHECK-LABEL: @test_vmlaq_f32(
7074 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7075 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7076 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7077 float32x4_t test_vmlaq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7078   return vmlaq_f32(a, b, c);
7079 }
7080 
7081 // CHECK-LABEL: @test_vmlaq_u8(
7082 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7083 // CHECK:   [[ADD_I:%.*]] = add <16 x i8> %a, [[MUL_I]]
7084 // CHECK:   ret <16 x i8> [[ADD_I]]
test_vmlaq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7085 uint8x16_t test_vmlaq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7086   return vmlaq_u8(a, b, c);
7087 }
7088 
7089 // CHECK-LABEL: @test_vmlaq_u16(
7090 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7091 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7092 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7093 uint16x8_t test_vmlaq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7094   return vmlaq_u16(a, b, c);
7095 }
7096 
7097 // CHECK-LABEL: @test_vmlaq_u32(
7098 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7099 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7100 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7101 uint32x4_t test_vmlaq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7102   return vmlaq_u32(a, b, c);
7103 }
7104 
7105 // CHECK-LABEL: @test_vmlal_s8(
7106 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7107 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7108 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_s8(int16x8_t a,int8x8_t b,int8x8_t c)7109 int16x8_t test_vmlal_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7110   return vmlal_s8(a, b, c);
7111 }
7112 
7113 // CHECK-LABEL: @test_vmlal_s16(
7114 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7115 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7116 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7117 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7118 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)7119 int32x4_t test_vmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7120   return vmlal_s16(a, b, c);
7121 }
7122 
7123 // CHECK-LABEL: @test_vmlal_s32(
7124 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7125 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7126 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7127 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7128 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)7129 int64x2_t test_vmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7130   return vmlal_s32(a, b, c);
7131 }
7132 
7133 // CHECK-LABEL: @test_vmlal_u8(
7134 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7135 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[VMULL_I_I]]
7136 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlal_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7137 uint16x8_t test_vmlal_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7138   return vmlal_u8(a, b, c);
7139 }
7140 
7141 // CHECK-LABEL: @test_vmlal_u16(
7142 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7143 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7144 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7145 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7146 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7147 uint32x4_t test_vmlal_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7148   return vmlal_u16(a, b, c);
7149 }
7150 
7151 // CHECK-LABEL: @test_vmlal_u32(
7152 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7153 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7154 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7155 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7156 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7157 uint64x2_t test_vmlal_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7158   return vmlal_u32(a, b, c);
7159 }
7160 
7161 // CHECK-LABEL: @test_vmlal_lane_s16(
7162 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7163 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7164 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7165 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7166 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7167 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7168 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7169 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7170 int32x4_t test_vmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7171   return vmlal_lane_s16(a, b, c, 3);
7172 }
7173 
7174 // CHECK-LABEL: @test_vmlal_lane_s32(
7175 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7176 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7177 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7178 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7179 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7180 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7181 // CHECK:   [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7182 // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7183 int64x2_t test_vmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7184   return vmlal_lane_s32(a, b, c, 1);
7185 }
7186 
7187 // CHECK-LABEL: @test_vmlal_lane_u16(
7188 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7189 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7190 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7191 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7192 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7193 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7194 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[VMULL2_I]]
7195 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlal_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7196 uint32x4_t test_vmlal_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7197   return vmlal_lane_u16(a, b, c, 3);
7198 }
7199 
7200 // CHECK-LABEL: @test_vmlal_lane_u32(
7201 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7202 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7203 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7204 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7205 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7206 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7207 // CHECK:   [[ADD:%.*]] = add <2 x i64> [[A:%.*]], [[VMULL2_I]]
7208 // CHECK:   ret <2 x i64> [[ADD]]
test_vmlal_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7209 uint64x2_t test_vmlal_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7210   return vmlal_lane_u32(a, b, c, 1);
7211 }
7212 
7213 // CHECK-LABEL: @test_vmlal_n_s16(
7214 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7215 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7216 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7217 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7218 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7219 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7220 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7221 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7222 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)7223 int32x4_t test_vmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7224   return vmlal_n_s16(a, b, c);
7225 }
7226 
7227 // CHECK-LABEL: @test_vmlal_n_s32(
7228 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7229 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7230 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7231 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7232 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7233 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7234 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)7235 int64x2_t test_vmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7236   return vmlal_n_s32(a, b, c);
7237 }
7238 
7239 // CHECK-LABEL: @test_vmlal_n_u16(
7240 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7241 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7242 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7243 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7244 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7245 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7246 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7247 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[VMULL2_I_I]]
7248 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlal_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7249 uint32x4_t test_vmlal_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7250   return vmlal_n_u16(a, b, c);
7251 }
7252 
7253 // CHECK-LABEL: @test_vmlal_n_u32(
7254 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7255 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7256 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7257 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7258 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7259 // CHECK:   [[ADD_I:%.*]] = add <2 x i64> %a, [[VMULL2_I_I]]
7260 // CHECK:   ret <2 x i64> [[ADD_I]]
test_vmlal_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7261 uint64x2_t test_vmlal_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7262   return vmlal_n_u32(a, b, c);
7263 }
7264 
7265 // CHECK-LABEL: @test_vmla_lane_s16(
7266 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7267 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7268 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7269 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7270 // CHECK:   [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7271 // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7272 int16x4_t test_vmla_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7273   return vmla_lane_s16(a, b, c, 3);
7274 }
7275 
7276 // CHECK-LABEL: @test_vmla_lane_s32(
7277 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7278 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7279 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7280 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7281 // CHECK:   [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7282 // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7283 int32x2_t test_vmla_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7284   return vmla_lane_s32(a, b, c, 1);
7285 }
7286 
7287 // CHECK-LABEL: @test_vmla_lane_u16(
7288 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7289 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7290 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7291 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7292 // CHECK:   [[ADD:%.*]] = add <4 x i16> [[A:%.*]], [[MUL]]
7293 // CHECK:   ret <4 x i16> [[ADD]]
test_vmla_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7294 uint16x4_t test_vmla_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7295   return vmla_lane_u16(a, b, c, 3);
7296 }
7297 
7298 // CHECK-LABEL: @test_vmla_lane_u32(
7299 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7300 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7301 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7302 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7303 // CHECK:   [[ADD:%.*]] = add <2 x i32> [[A:%.*]], [[MUL]]
7304 // CHECK:   ret <2 x i32> [[ADD]]
test_vmla_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7305 uint32x2_t test_vmla_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7306   return vmla_lane_u32(a, b, c, 1);
7307 }
7308 
7309 // CHECK-LABEL: @test_vmla_lane_f32(
7310 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7311 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7312 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7313 // CHECK:   [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7314 // CHECK:   [[ADD:%.*]] = fadd <2 x float> [[A:%.*]], [[MUL]]
7315 // CHECK:   ret <2 x float> [[ADD]]
test_vmla_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7316 float32x2_t test_vmla_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7317   return vmla_lane_f32(a, b, c, 1);
7318 }
7319 
7320 // CHECK-LABEL: @test_vmlaq_lane_s16(
7321 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7322 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7323 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7324 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7325 // CHECK:   [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7326 // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7327 int16x8_t test_vmlaq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7328   return vmlaq_lane_s16(a, b, c, 3);
7329 }
7330 
7331 // CHECK-LABEL: @test_vmlaq_lane_s32(
7332 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7333 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7334 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7335 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7336 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7337 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7338 int32x4_t test_vmlaq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7339   return vmlaq_lane_s32(a, b, c, 1);
7340 }
7341 
7342 // CHECK-LABEL: @test_vmlaq_lane_u16(
7343 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7344 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7345 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7346 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7347 // CHECK:   [[ADD:%.*]] = add <8 x i16> [[A:%.*]], [[MUL]]
7348 // CHECK:   ret <8 x i16> [[ADD]]
test_vmlaq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7349 uint16x8_t test_vmlaq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7350   return vmlaq_lane_u16(a, b, c, 3);
7351 }
7352 
7353 // CHECK-LABEL: @test_vmlaq_lane_u32(
7354 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7355 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7356 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7357 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7358 // CHECK:   [[ADD:%.*]] = add <4 x i32> [[A:%.*]], [[MUL]]
7359 // CHECK:   ret <4 x i32> [[ADD]]
test_vmlaq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7360 uint32x4_t test_vmlaq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7361   return vmlaq_lane_u32(a, b, c, 1);
7362 }
7363 
7364 // CHECK-LABEL: @test_vmlaq_lane_f32(
7365 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7366 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7367 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7368 // CHECK:   [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7369 // CHECK:   [[ADD:%.*]] = fadd <4 x float> [[A:%.*]], [[MUL]]
7370 // CHECK:   ret <4 x float> [[ADD]]
test_vmlaq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7371 float32x4_t test_vmlaq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7372   return vmlaq_lane_f32(a, b, c, 1);
7373 }
7374 
7375 // CHECK-LABEL: @test_vmla_n_s16(
7376 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7377 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7378 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7379 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7380 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7381 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7382 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_n_s16(int16x4_t a,int16x4_t b,int16_t c)7383 int16x4_t test_vmla_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7384   return vmla_n_s16(a, b, c);
7385 }
7386 
7387 // CHECK-LABEL: @test_vmla_n_s32(
7388 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7389 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7390 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7391 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7392 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_n_s32(int32x2_t a,int32x2_t b,int32_t c)7393 int32x2_t test_vmla_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7394   return vmla_n_s32(a, b, c);
7395 }
7396 
7397 // CHECK-LABEL: @test_vmla_n_u16(
7398 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7399 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7400 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7401 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7402 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7403 // CHECK:   [[ADD_I:%.*]] = add <4 x i16> %a, [[MUL_I]]
7404 // CHECK:   ret <4 x i16> [[ADD_I]]
test_vmla_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7405 uint16x4_t test_vmla_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7406   return vmla_n_u16(a, b, c);
7407 }
7408 
7409 // CHECK-LABEL: @test_vmla_n_u32(
7410 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7411 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7412 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7413 // CHECK:   [[ADD_I:%.*]] = add <2 x i32> %a, [[MUL_I]]
7414 // CHECK:   ret <2 x i32> [[ADD_I]]
test_vmla_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7415 uint32x2_t test_vmla_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7416   return vmla_n_u32(a, b, c);
7417 }
7418 
7419 // CHECK-LABEL: @test_vmla_n_f32(
7420 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7421 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7422 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7423 // CHECK:   [[ADD_I:%.*]] = fadd <2 x float> %a, [[MUL_I]]
7424 // CHECK:   ret <2 x float> [[ADD_I]]
test_vmla_n_f32(float32x2_t a,float32x2_t b,float32_t c)7425 float32x2_t test_vmla_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7426   return vmla_n_f32(a, b, c);
7427 }
7428 
7429 // CHECK-LABEL: @test_vmlaq_n_s16(
7430 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7431 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7432 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7433 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7434 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7435 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7436 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7437 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7438 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7439 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7440 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7441 int16x8_t test_vmlaq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7442   return vmlaq_n_s16(a, b, c);
7443 }
7444 
7445 // CHECK-LABEL: @test_vmlaq_n_s32(
7446 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7447 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7448 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7449 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7450 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7451 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7452 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7453 int32x4_t test_vmlaq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7454   return vmlaq_n_s32(a, b, c);
7455 }
7456 
7457 // CHECK-LABEL: @test_vmlaq_n_u16(
7458 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7459 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7460 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7461 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7462 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7463 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7464 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7465 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7466 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7467 // CHECK:   [[ADD_I:%.*]] = add <8 x i16> %a, [[MUL_I]]
7468 // CHECK:   ret <8 x i16> [[ADD_I]]
test_vmlaq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7469 uint16x8_t test_vmlaq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7470   return vmlaq_n_u16(a, b, c);
7471 }
7472 
7473 // CHECK-LABEL: @test_vmlaq_n_u32(
7474 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7475 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7476 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7477 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7478 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7479 // CHECK:   [[ADD_I:%.*]] = add <4 x i32> %a, [[MUL_I]]
7480 // CHECK:   ret <4 x i32> [[ADD_I]]
test_vmlaq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7481 uint32x4_t test_vmlaq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7482   return vmlaq_n_u32(a, b, c);
7483 }
7484 
7485 // CHECK-LABEL: @test_vmlaq_n_f32(
7486 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7487 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7488 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7489 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7490 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7491 // CHECK:   [[ADD_I:%.*]] = fadd <4 x float> %a, [[MUL_I]]
7492 // CHECK:   ret <4 x float> [[ADD_I]]
test_vmlaq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7493 float32x4_t test_vmlaq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7494   return vmlaq_n_f32(a, b, c);
7495 }
7496 
7497 // CHECK-LABEL: @test_vmls_s8(
7498 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7499 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7500 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_s8(int8x8_t a,int8x8_t b,int8x8_t c)7501 int8x8_t test_vmls_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
7502   return vmls_s8(a, b, c);
7503 }
7504 
7505 // CHECK-LABEL: @test_vmls_s16(
7506 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7507 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7508 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_s16(int16x4_t a,int16x4_t b,int16x4_t c)7509 int16x4_t test_vmls_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7510   return vmls_s16(a, b, c);
7511 }
7512 
7513 // CHECK-LABEL: @test_vmls_s32(
7514 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7515 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7516 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_s32(int32x2_t a,int32x2_t b,int32x2_t c)7517 int32x2_t test_vmls_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7518   return vmls_s32(a, b, c);
7519 }
7520 
7521 // CHECK-LABEL: @test_vmls_f32(
7522 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, %c
7523 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7524 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_f32(float32x2_t a,float32x2_t b,float32x2_t c)7525 float32x2_t test_vmls_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7526   return vmls_f32(a, b, c);
7527 }
7528 
7529 // CHECK-LABEL: @test_vmls_u8(
7530 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %b, %c
7531 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, [[MUL_I]]
7532 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vmls_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)7533 uint8x8_t test_vmls_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
7534   return vmls_u8(a, b, c);
7535 }
7536 
7537 // CHECK-LABEL: @test_vmls_u16(
7538 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, %c
7539 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7540 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7541 uint16x4_t test_vmls_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7542   return vmls_u16(a, b, c);
7543 }
7544 
7545 // CHECK-LABEL: @test_vmls_u32(
7546 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, %c
7547 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7548 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7549 uint32x2_t test_vmls_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7550   return vmls_u32(a, b, c);
7551 }
7552 
7553 // CHECK-LABEL: @test_vmlsq_s8(
7554 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7555 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7556 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_s8(int8x16_t a,int8x16_t b,int8x16_t c)7557 int8x16_t test_vmlsq_s8(int8x16_t a, int8x16_t b, int8x16_t c) {
7558   return vmlsq_s8(a, b, c);
7559 }
7560 
7561 // CHECK-LABEL: @test_vmlsq_s16(
7562 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7563 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7564 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_s16(int16x8_t a,int16x8_t b,int16x8_t c)7565 int16x8_t test_vmlsq_s16(int16x8_t a, int16x8_t b, int16x8_t c) {
7566   return vmlsq_s16(a, b, c);
7567 }
7568 
7569 // CHECK-LABEL: @test_vmlsq_s32(
7570 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7571 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7572 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_s32(int32x4_t a,int32x4_t b,int32x4_t c)7573 int32x4_t test_vmlsq_s32(int32x4_t a, int32x4_t b, int32x4_t c) {
7574   return vmlsq_s32(a, b, c);
7575 }
7576 
7577 // CHECK-LABEL: @test_vmlsq_f32(
7578 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, %c
7579 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7580 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_f32(float32x4_t a,float32x4_t b,float32x4_t c)7581 float32x4_t test_vmlsq_f32(float32x4_t a, float32x4_t b, float32x4_t c) {
7582   return vmlsq_f32(a, b, c);
7583 }
7584 
7585 // CHECK-LABEL: @test_vmlsq_u8(
7586 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %b, %c
7587 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, [[MUL_I]]
7588 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vmlsq_u8(uint8x16_t a,uint8x16_t b,uint8x16_t c)7589 uint8x16_t test_vmlsq_u8(uint8x16_t a, uint8x16_t b, uint8x16_t c) {
7590   return vmlsq_u8(a, b, c);
7591 }
7592 
7593 // CHECK-LABEL: @test_vmlsq_u16(
7594 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, %c
7595 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7596 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_u16(uint16x8_t a,uint16x8_t b,uint16x8_t c)7597 uint16x8_t test_vmlsq_u16(uint16x8_t a, uint16x8_t b, uint16x8_t c) {
7598   return vmlsq_u16(a, b, c);
7599 }
7600 
7601 // CHECK-LABEL: @test_vmlsq_u32(
7602 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, %c
7603 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7604 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_u32(uint32x4_t a,uint32x4_t b,uint32x4_t c)7605 uint32x4_t test_vmlsq_u32(uint32x4_t a, uint32x4_t b, uint32x4_t c) {
7606   return vmlsq_u32(a, b, c);
7607 }
7608 
7609 // CHECK-LABEL: @test_vmlsl_s8(
7610 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %b, <8 x i8> %c)
7611 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7612 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_s8(int16x8_t a,int8x8_t b,int8x8_t c)7613 int16x8_t test_vmlsl_s8(int16x8_t a, int8x8_t b, int8x8_t c) {
7614   return vmlsl_s8(a, b, c);
7615 }
7616 
7617 // CHECK-LABEL: @test_vmlsl_s16(
7618 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7619 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7620 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> %c)
7621 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7622 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)7623 int32x4_t test_vmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7624   return vmlsl_s16(a, b, c);
7625 }
7626 
7627 // CHECK-LABEL: @test_vmlsl_s32(
7628 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7629 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7630 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> %c)
7631 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7632 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)7633 int64x2_t test_vmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7634   return vmlsl_s32(a, b, c);
7635 }
7636 
7637 // CHECK-LABEL: @test_vmlsl_u8(
7638 // CHECK:   [[VMULL_I_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %b, <8 x i8> %c)
7639 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMULL_I_I]]
7640 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsl_u8(uint16x8_t a,uint8x8_t b,uint8x8_t c)7641 uint16x8_t test_vmlsl_u8(uint16x8_t a, uint8x8_t b, uint8x8_t c) {
7642   return vmlsl_u8(a, b, c);
7643 }
7644 
7645 // CHECK-LABEL: @test_vmlsl_u16(
7646 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7647 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %c to <8 x i8>
7648 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> %c)
7649 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7650 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7651 uint32x4_t test_vmlsl_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7652   return vmlsl_u16(a, b, c);
7653 }
7654 
7655 // CHECK-LABEL: @test_vmlsl_u32(
7656 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7657 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %c to <8 x i8>
7658 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> %c)
7659 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7660 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7661 uint64x2_t test_vmlsl_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7662   return vmlsl_u32(a, b, c);
7663 }
7664 
7665 // CHECK-LABEL: @test_vmlsl_lane_s16(
7666 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7667 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7668 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7669 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7670 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7671 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7672 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7673 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)7674 int32x4_t test_vmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
7675   return vmlsl_lane_s16(a, b, c, 3);
7676 }
7677 
7678 // CHECK-LABEL: @test_vmlsl_lane_s32(
7679 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7680 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7681 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7682 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7683 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7684 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7685 // CHECK:   [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7686 // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)7687 int64x2_t test_vmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
7688   return vmlsl_lane_s32(a, b, c, 1);
7689 }
7690 
7691 // CHECK-LABEL: @test_vmlsl_lane_u16(
7692 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7693 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7694 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7695 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
7696 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
7697 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
7698 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[VMULL2_I]]
7699 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsl_lane_u16(uint32x4_t a,uint16x4_t b,uint16x4_t c)7700 uint32x4_t test_vmlsl_lane_u16(uint32x4_t a, uint16x4_t b, uint16x4_t c) {
7701   return vmlsl_lane_u16(a, b, c, 3);
7702 }
7703 
7704 // CHECK-LABEL: @test_vmlsl_lane_u32(
7705 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7706 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7707 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7708 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
7709 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
7710 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
7711 // CHECK:   [[SUB:%.*]] = sub <2 x i64> [[A:%.*]], [[VMULL2_I]]
7712 // CHECK:   ret <2 x i64> [[SUB]]
test_vmlsl_lane_u32(uint64x2_t a,uint32x2_t b,uint32x2_t c)7713 uint64x2_t test_vmlsl_lane_u32(uint64x2_t a, uint32x2_t b, uint32x2_t c) {
7714   return vmlsl_lane_u32(a, b, c, 1);
7715 }
7716 
7717 // CHECK-LABEL: @test_vmlsl_n_s16(
7718 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7719 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7720 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7721 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7722 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7723 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7724 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7725 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7726 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)7727 int32x4_t test_vmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
7728   return vmlsl_n_s16(a, b, c);
7729 }
7730 
7731 // CHECK-LABEL: @test_vmlsl_n_s32(
7732 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7733 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7734 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7735 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7736 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7737 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7738 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)7739 int64x2_t test_vmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
7740   return vmlsl_n_s32(a, b, c);
7741 }
7742 
7743 // CHECK-LABEL: @test_vmlsl_n_u16(
7744 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7745 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7746 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7747 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7748 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
7749 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
7750 // CHECK:   [[VMULL2_I_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
7751 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMULL2_I_I]]
7752 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsl_n_u16(uint32x4_t a,uint16x4_t b,uint16_t c)7753 uint32x4_t test_vmlsl_n_u16(uint32x4_t a, uint16x4_t b, uint16_t c) {
7754   return vmlsl_n_u16(a, b, c);
7755 }
7756 
7757 // CHECK-LABEL: @test_vmlsl_n_u32(
7758 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7759 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7760 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
7761 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
7762 // CHECK:   [[VMULL2_I_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
7763 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMULL2_I_I]]
7764 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vmlsl_n_u32(uint64x2_t a,uint32x2_t b,uint32_t c)7765 uint64x2_t test_vmlsl_n_u32(uint64x2_t a, uint32x2_t b, uint32_t c) {
7766   return vmlsl_n_u32(a, b, c);
7767 }
7768 
7769 // CHECK-LABEL: @test_vmls_lane_s16(
7770 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7771 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7772 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7773 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7774 // CHECK:   [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7775 // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_s16(int16x4_t a,int16x4_t b,int16x4_t c)7776 int16x4_t test_vmls_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) {
7777   return vmls_lane_s16(a, b, c, 3);
7778 }
7779 
7780 // CHECK-LABEL: @test_vmls_lane_s32(
7781 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7782 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7783 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7784 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7785 // CHECK:   [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7786 // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_s32(int32x2_t a,int32x2_t b,int32x2_t c)7787 int32x2_t test_vmls_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) {
7788   return vmls_lane_s32(a, b, c, 1);
7789 }
7790 
7791 // CHECK-LABEL: @test_vmls_lane_u16(
7792 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7793 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7794 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
7795 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[B:%.*]], [[LANE]]
7796 // CHECK:   [[SUB:%.*]] = sub <4 x i16> [[A:%.*]], [[MUL]]
7797 // CHECK:   ret <4 x i16> [[SUB]]
test_vmls_lane_u16(uint16x4_t a,uint16x4_t b,uint16x4_t c)7798 uint16x4_t test_vmls_lane_u16(uint16x4_t a, uint16x4_t b, uint16x4_t c) {
7799   return vmls_lane_u16(a, b, c, 3);
7800 }
7801 
7802 // CHECK-LABEL: @test_vmls_lane_u32(
7803 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7804 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7805 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
7806 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[B:%.*]], [[LANE]]
7807 // CHECK:   [[SUB:%.*]] = sub <2 x i32> [[A:%.*]], [[MUL]]
7808 // CHECK:   ret <2 x i32> [[SUB]]
test_vmls_lane_u32(uint32x2_t a,uint32x2_t b,uint32x2_t c)7809 uint32x2_t test_vmls_lane_u32(uint32x2_t a, uint32x2_t b, uint32x2_t c) {
7810   return vmls_lane_u32(a, b, c, 1);
7811 }
7812 
7813 // CHECK-LABEL: @test_vmls_lane_f32(
7814 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7815 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7816 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
7817 // CHECK:   [[MUL:%.*]] = fmul <2 x float> [[B:%.*]], [[LANE]]
7818 // CHECK:   [[SUB:%.*]] = fsub <2 x float> [[A:%.*]], [[MUL]]
7819 // CHECK:   ret <2 x float> [[SUB]]
test_vmls_lane_f32(float32x2_t a,float32x2_t b,float32x2_t c)7820 float32x2_t test_vmls_lane_f32(float32x2_t a, float32x2_t b, float32x2_t c) {
7821   return vmls_lane_f32(a, b, c, 1);
7822 }
7823 
7824 // CHECK-LABEL: @test_vmlsq_lane_s16(
7825 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7826 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7827 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7828 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7829 // CHECK:   [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7830 // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_s16(int16x8_t a,int16x8_t b,int16x4_t c)7831 int16x8_t test_vmlsq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) {
7832   return vmlsq_lane_s16(a, b, c, 3);
7833 }
7834 
7835 // CHECK-LABEL: @test_vmlsq_lane_s32(
7836 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7837 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7838 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7839 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7840 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7841 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_s32(int32x4_t a,int32x4_t b,int32x2_t c)7842 int32x4_t test_vmlsq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) {
7843   return vmlsq_lane_s32(a, b, c, 1);
7844 }
7845 
7846 // CHECK-LABEL: @test_vmlsq_lane_u16(
7847 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
7848 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
7849 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
7850 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[B:%.*]], [[LANE]]
7851 // CHECK:   [[SUB:%.*]] = sub <8 x i16> [[A:%.*]], [[MUL]]
7852 // CHECK:   ret <8 x i16> [[SUB]]
test_vmlsq_lane_u16(uint16x8_t a,uint16x8_t b,uint16x4_t c)7853 uint16x8_t test_vmlsq_lane_u16(uint16x8_t a, uint16x8_t b, uint16x4_t c) {
7854   return vmlsq_lane_u16(a, b, c, 3);
7855 }
7856 
7857 // CHECK-LABEL: @test_vmlsq_lane_u32(
7858 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
7859 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
7860 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7861 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[B:%.*]], [[LANE]]
7862 // CHECK:   [[SUB:%.*]] = sub <4 x i32> [[A:%.*]], [[MUL]]
7863 // CHECK:   ret <4 x i32> [[SUB]]
test_vmlsq_lane_u32(uint32x4_t a,uint32x4_t b,uint32x2_t c)7864 uint32x4_t test_vmlsq_lane_u32(uint32x4_t a, uint32x4_t b, uint32x2_t c) {
7865   return vmlsq_lane_u32(a, b, c, 1);
7866 }
7867 
7868 // CHECK-LABEL: @test_vmlsq_lane_f32(
7869 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[C:%.*]] to <8 x i8>
7870 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
7871 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
7872 // CHECK:   [[MUL:%.*]] = fmul <4 x float> [[B:%.*]], [[LANE]]
7873 // CHECK:   [[SUB:%.*]] = fsub <4 x float> [[A:%.*]], [[MUL]]
7874 // CHECK:   ret <4 x float> [[SUB]]
test_vmlsq_lane_f32(float32x4_t a,float32x4_t b,float32x2_t c)7875 float32x4_t test_vmlsq_lane_f32(float32x4_t a, float32x4_t b, float32x2_t c) {
7876   return vmlsq_lane_f32(a, b, c, 1);
7877 }
7878 
7879 // CHECK-LABEL: @test_vmls_n_s16(
7880 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7881 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7882 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7883 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7884 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7885 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7886 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_n_s16(int16x4_t a,int16x4_t b,int16_t c)7887 int16x4_t test_vmls_n_s16(int16x4_t a, int16x4_t b, int16_t c) {
7888   return vmls_n_s16(a, b, c);
7889 }
7890 
7891 // CHECK-LABEL: @test_vmls_n_s32(
7892 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7893 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7894 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7895 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7896 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_n_s32(int32x2_t a,int32x2_t b,int32_t c)7897 int32x2_t test_vmls_n_s32(int32x2_t a, int32x2_t b, int32_t c) {
7898   return vmls_n_s32(a, b, c);
7899 }
7900 
7901 // CHECK-LABEL: @test_vmls_n_u16(
7902 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
7903 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
7904 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
7905 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
7906 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %b, [[VECINIT3_I]]
7907 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, [[MUL_I]]
7908 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vmls_n_u16(uint16x4_t a,uint16x4_t b,uint16_t c)7909 uint16x4_t test_vmls_n_u16(uint16x4_t a, uint16x4_t b, uint16_t c) {
7910   return vmls_n_u16(a, b, c);
7911 }
7912 
7913 // CHECK-LABEL: @test_vmls_n_u32(
7914 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
7915 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
7916 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %b, [[VECINIT1_I]]
7917 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, [[MUL_I]]
7918 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vmls_n_u32(uint32x2_t a,uint32x2_t b,uint32_t c)7919 uint32x2_t test_vmls_n_u32(uint32x2_t a, uint32x2_t b, uint32_t c) {
7920   return vmls_n_u32(a, b, c);
7921 }
7922 
7923 // CHECK-LABEL: @test_vmls_n_f32(
7924 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %c, i32 0
7925 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %c, i32 1
7926 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %b, [[VECINIT1_I]]
7927 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, [[MUL_I]]
7928 // CHECK:   ret <2 x float> [[SUB_I]]
test_vmls_n_f32(float32x2_t a,float32x2_t b,float32_t c)7929 float32x2_t test_vmls_n_f32(float32x2_t a, float32x2_t b, float32_t c) {
7930   return vmls_n_f32(a, b, c);
7931 }
7932 
7933 // CHECK-LABEL: @test_vmlsq_n_s16(
7934 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7935 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7936 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7937 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7938 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7939 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7940 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7941 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7942 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7943 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7944 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_n_s16(int16x8_t a,int16x8_t b,int16_t c)7945 int16x8_t test_vmlsq_n_s16(int16x8_t a, int16x8_t b, int16_t c) {
7946   return vmlsq_n_s16(a, b, c);
7947 }
7948 
7949 // CHECK-LABEL: @test_vmlsq_n_s32(
7950 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7951 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7952 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7953 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7954 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7955 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7956 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_n_s32(int32x4_t a,int32x4_t b,int32_t c)7957 int32x4_t test_vmlsq_n_s32(int32x4_t a, int32x4_t b, int32_t c) {
7958   return vmlsq_n_s32(a, b, c);
7959 }
7960 
7961 // CHECK-LABEL: @test_vmlsq_n_u16(
7962 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %c, i32 0
7963 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %c, i32 1
7964 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %c, i32 2
7965 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %c, i32 3
7966 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %c, i32 4
7967 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %c, i32 5
7968 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %c, i32 6
7969 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %c, i32 7
7970 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %b, [[VECINIT7_I]]
7971 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[MUL_I]]
7972 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vmlsq_n_u16(uint16x8_t a,uint16x8_t b,uint16_t c)7973 uint16x8_t test_vmlsq_n_u16(uint16x8_t a, uint16x8_t b, uint16_t c) {
7974   return vmlsq_n_u16(a, b, c);
7975 }
7976 
7977 // CHECK-LABEL: @test_vmlsq_n_u32(
7978 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %c, i32 0
7979 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %c, i32 1
7980 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %c, i32 2
7981 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %c, i32 3
7982 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %b, [[VECINIT3_I]]
7983 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[MUL_I]]
7984 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vmlsq_n_u32(uint32x4_t a,uint32x4_t b,uint32_t c)7985 uint32x4_t test_vmlsq_n_u32(uint32x4_t a, uint32x4_t b, uint32_t c) {
7986   return vmlsq_n_u32(a, b, c);
7987 }
7988 
7989 // CHECK-LABEL: @test_vmlsq_n_f32(
7990 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %c, i32 0
7991 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %c, i32 1
7992 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %c, i32 2
7993 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %c, i32 3
7994 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %b, [[VECINIT3_I]]
7995 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, [[MUL_I]]
7996 // CHECK:   ret <4 x float> [[SUB_I]]
test_vmlsq_n_f32(float32x4_t a,float32x4_t b,float32_t c)7997 float32x4_t test_vmlsq_n_f32(float32x4_t a, float32x4_t b, float32_t c) {
7998   return vmlsq_n_f32(a, b, c);
7999 }
8000 
8001 // CHECK-LABEL: @test_vmovl_s8(
8002 // CHECK:   [[VMOVL_I:%.*]] = sext <8 x i8> %a to <8 x i16>
8003 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_s8(int8x8_t a)8004 int16x8_t test_vmovl_s8(int8x8_t a) {
8005   return vmovl_s8(a);
8006 }
8007 
8008 // CHECK-LABEL: @test_vmovl_s16(
8009 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8010 // CHECK:   [[VMOVL_I:%.*]] = sext <4 x i16> %a to <4 x i32>
8011 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_s16(int16x4_t a)8012 int32x4_t test_vmovl_s16(int16x4_t a) {
8013   return vmovl_s16(a);
8014 }
8015 
8016 // CHECK-LABEL: @test_vmovl_s32(
8017 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8018 // CHECK:   [[VMOVL_I:%.*]] = sext <2 x i32> %a to <2 x i64>
8019 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_s32(int32x2_t a)8020 int64x2_t test_vmovl_s32(int32x2_t a) {
8021   return vmovl_s32(a);
8022 }
8023 
8024 // CHECK-LABEL: @test_vmovl_u8(
8025 // CHECK:   [[VMOVL_I:%.*]] = zext <8 x i8> %a to <8 x i16>
8026 // CHECK:   ret <8 x i16> [[VMOVL_I]]
test_vmovl_u8(uint8x8_t a)8027 uint16x8_t test_vmovl_u8(uint8x8_t a) {
8028   return vmovl_u8(a);
8029 }
8030 
8031 // CHECK-LABEL: @test_vmovl_u16(
8032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8033 // CHECK:   [[VMOVL_I:%.*]] = zext <4 x i16> %a to <4 x i32>
8034 // CHECK:   ret <4 x i32> [[VMOVL_I]]
test_vmovl_u16(uint16x4_t a)8035 uint32x4_t test_vmovl_u16(uint16x4_t a) {
8036   return vmovl_u16(a);
8037 }
8038 
8039 // CHECK-LABEL: @test_vmovl_u32(
8040 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8041 // CHECK:   [[VMOVL_I:%.*]] = zext <2 x i32> %a to <2 x i64>
8042 // CHECK:   ret <2 x i64> [[VMOVL_I]]
test_vmovl_u32(uint32x2_t a)8043 uint64x2_t test_vmovl_u32(uint32x2_t a) {
8044   return vmovl_u32(a);
8045 }
8046 
8047 // CHECK-LABEL: @test_vmovn_s16(
8048 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8049 // CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8050 // CHECK:   ret <8 x i8> [[VMOVN_I]]
test_vmovn_s16(int16x8_t a)8051 int8x8_t test_vmovn_s16(int16x8_t a) {
8052   return vmovn_s16(a);
8053 }
8054 
8055 // CHECK-LABEL: @test_vmovn_s32(
8056 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8057 // CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8058 // CHECK:   ret <4 x i16> [[VMOVN_I]]
test_vmovn_s32(int32x4_t a)8059 int16x4_t test_vmovn_s32(int32x4_t a) {
8060   return vmovn_s32(a);
8061 }
8062 
8063 // CHECK-LABEL: @test_vmovn_s64(
8064 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8065 // CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8066 // CHECK:   ret <2 x i32> [[VMOVN_I]]
test_vmovn_s64(int64x2_t a)8067 int32x2_t test_vmovn_s64(int64x2_t a) {
8068   return vmovn_s64(a);
8069 }
8070 
8071 // CHECK-LABEL: @test_vmovn_u16(
8072 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
8073 // CHECK:   [[VMOVN_I:%.*]] = trunc <8 x i16> %a to <8 x i8>
8074 // CHECK:   ret <8 x i8> [[VMOVN_I]]
test_vmovn_u16(uint16x8_t a)8075 uint8x8_t test_vmovn_u16(uint16x8_t a) {
8076   return vmovn_u16(a);
8077 }
8078 
8079 // CHECK-LABEL: @test_vmovn_u32(
8080 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
8081 // CHECK:   [[VMOVN_I:%.*]] = trunc <4 x i32> %a to <4 x i16>
8082 // CHECK:   ret <4 x i16> [[VMOVN_I]]
test_vmovn_u32(uint32x4_t a)8083 uint16x4_t test_vmovn_u32(uint32x4_t a) {
8084   return vmovn_u32(a);
8085 }
8086 
8087 // CHECK-LABEL: @test_vmovn_u64(
8088 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
8089 // CHECK:   [[VMOVN_I:%.*]] = trunc <2 x i64> %a to <2 x i32>
8090 // CHECK:   ret <2 x i32> [[VMOVN_I]]
test_vmovn_u64(uint64x2_t a)8091 uint32x2_t test_vmovn_u64(uint64x2_t a) {
8092   return vmovn_u64(a);
8093 }
8094 
8095 // CHECK-LABEL: @test_vmov_n_u8(
8096 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8097 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8098 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8099 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8100 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8101 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8102 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8103 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8104 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_u8(uint8_t a)8105 uint8x8_t test_vmov_n_u8(uint8_t a) {
8106   return vmov_n_u8(a);
8107 }
8108 
8109 // CHECK-LABEL: @test_vmov_n_u16(
8110 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8111 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8112 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8113 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8114 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_u16(uint16_t a)8115 uint16x4_t test_vmov_n_u16(uint16_t a) {
8116   return vmov_n_u16(a);
8117 }
8118 
8119 // CHECK-LABEL: @test_vmov_n_u32(
8120 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8121 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8122 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_u32(uint32_t a)8123 uint32x2_t test_vmov_n_u32(uint32_t a) {
8124   return vmov_n_u32(a);
8125 }
8126 
8127 // CHECK-LABEL: @test_vmov_n_s8(
8128 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8129 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8130 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8131 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8132 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8133 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8134 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8135 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8136 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_s8(int8_t a)8137 int8x8_t test_vmov_n_s8(int8_t a) {
8138   return vmov_n_s8(a);
8139 }
8140 
8141 // CHECK-LABEL: @test_vmov_n_s16(
8142 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8143 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8144 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8145 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8146 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_s16(int16_t a)8147 int16x4_t test_vmov_n_s16(int16_t a) {
8148   return vmov_n_s16(a);
8149 }
8150 
8151 // CHECK-LABEL: @test_vmov_n_s32(
8152 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %a, i32 0
8153 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %a, i32 1
8154 // CHECK:   ret <2 x i32> [[VECINIT1_I]]
test_vmov_n_s32(int32_t a)8155 int32x2_t test_vmov_n_s32(int32_t a) {
8156   return vmov_n_s32(a);
8157 }
8158 
8159 // CHECK-LABEL: @test_vmov_n_p8(
8160 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i8> undef, i8 %a, i32 0
8161 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i8> [[VECINIT_I]], i8 %a, i32 1
8162 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i8> [[VECINIT1_I]], i8 %a, i32 2
8163 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i8> [[VECINIT2_I]], i8 %a, i32 3
8164 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i8> [[VECINIT3_I]], i8 %a, i32 4
8165 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i8> [[VECINIT4_I]], i8 %a, i32 5
8166 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i8> [[VECINIT5_I]], i8 %a, i32 6
8167 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i8> [[VECINIT6_I]], i8 %a, i32 7
8168 // CHECK:   ret <8 x i8> [[VECINIT7_I]]
test_vmov_n_p8(poly8_t a)8169 poly8x8_t test_vmov_n_p8(poly8_t a) {
8170   return vmov_n_p8(a);
8171 }
8172 
8173 // CHECK-LABEL: @test_vmov_n_p16(
8174 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %a, i32 0
8175 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %a, i32 1
8176 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %a, i32 2
8177 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %a, i32 3
8178 // CHECK:   ret <4 x i16> [[VECINIT3_I]]
test_vmov_n_p16(poly16_t a)8179 poly16x4_t test_vmov_n_p16(poly16_t a) {
8180   return vmov_n_p16(a);
8181 }
8182 
8183 // CHECK-LABEL: @test_vmov_n_f16(
8184 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
8185 // CHECK:   [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[TMP0]], i32 0
8186 // CHECK:   [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[TMP0]], i32 1
8187 // CHECK:   [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[TMP0]], i32 2
8188 // CHECK:   [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[TMP0]], i32 3
8189 // CHECK:   ret <4 x half> [[VECINIT3]]
test_vmov_n_f16(float16_t * a)8190 float16x4_t test_vmov_n_f16(float16_t *a) {
8191   return vmov_n_f16(*a);
8192 }
8193 
8194 // CHECK-LABEL: @test_vmov_n_f32(
8195 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %a, i32 0
8196 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %a, i32 1
8197 // CHECK:   ret <2 x float> [[VECINIT1_I]]
test_vmov_n_f32(float32_t a)8198 float32x2_t test_vmov_n_f32(float32_t a) {
8199   return vmov_n_f32(a);
8200 }
8201 
8202 // CHECK-LABEL: @test_vmovq_n_u8(
8203 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8204 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8205 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8206 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8207 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8208 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8209 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8210 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8211 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8212 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8213 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8214 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8215 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8216 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8217 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8218 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8219 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_u8(uint8_t a)8220 uint8x16_t test_vmovq_n_u8(uint8_t a) {
8221   return vmovq_n_u8(a);
8222 }
8223 
8224 // CHECK-LABEL: @test_vmovq_n_u16(
8225 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8226 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8227 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8228 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8229 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8230 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8231 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8232 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8233 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_u16(uint16_t a)8234 uint16x8_t test_vmovq_n_u16(uint16_t a) {
8235   return vmovq_n_u16(a);
8236 }
8237 
8238 // CHECK-LABEL: @test_vmovq_n_u32(
8239 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8240 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8241 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8242 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8243 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_u32(uint32_t a)8244 uint32x4_t test_vmovq_n_u32(uint32_t a) {
8245   return vmovq_n_u32(a);
8246 }
8247 
8248 // CHECK-LABEL: @test_vmovq_n_s8(
8249 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8250 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8251 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8252 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8253 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8254 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8255 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8256 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8257 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8258 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8259 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8260 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8261 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8262 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8263 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8264 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8265 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_s8(int8_t a)8266 int8x16_t test_vmovq_n_s8(int8_t a) {
8267   return vmovq_n_s8(a);
8268 }
8269 
8270 // CHECK-LABEL: @test_vmovq_n_s16(
8271 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8272 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8273 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8274 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8275 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8276 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8277 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8278 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8279 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_s16(int16_t a)8280 int16x8_t test_vmovq_n_s16(int16_t a) {
8281   return vmovq_n_s16(a);
8282 }
8283 
8284 // CHECK-LABEL: @test_vmovq_n_s32(
8285 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %a, i32 0
8286 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %a, i32 1
8287 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %a, i32 2
8288 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %a, i32 3
8289 // CHECK:   ret <4 x i32> [[VECINIT3_I]]
test_vmovq_n_s32(int32_t a)8290 int32x4_t test_vmovq_n_s32(int32_t a) {
8291   return vmovq_n_s32(a);
8292 }
8293 
8294 // CHECK-LABEL: @test_vmovq_n_p8(
8295 // CHECK:   [[VECINIT_I:%.*]] = insertelement <16 x i8> undef, i8 %a, i32 0
8296 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <16 x i8> [[VECINIT_I]], i8 %a, i32 1
8297 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <16 x i8> [[VECINIT1_I]], i8 %a, i32 2
8298 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <16 x i8> [[VECINIT2_I]], i8 %a, i32 3
8299 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <16 x i8> [[VECINIT3_I]], i8 %a, i32 4
8300 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <16 x i8> [[VECINIT4_I]], i8 %a, i32 5
8301 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <16 x i8> [[VECINIT5_I]], i8 %a, i32 6
8302 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <16 x i8> [[VECINIT6_I]], i8 %a, i32 7
8303 // CHECK:   [[VECINIT8_I:%.*]] = insertelement <16 x i8> [[VECINIT7_I]], i8 %a, i32 8
8304 // CHECK:   [[VECINIT9_I:%.*]] = insertelement <16 x i8> [[VECINIT8_I]], i8 %a, i32 9
8305 // CHECK:   [[VECINIT10_I:%.*]] = insertelement <16 x i8> [[VECINIT9_I]], i8 %a, i32 10
8306 // CHECK:   [[VECINIT11_I:%.*]] = insertelement <16 x i8> [[VECINIT10_I]], i8 %a, i32 11
8307 // CHECK:   [[VECINIT12_I:%.*]] = insertelement <16 x i8> [[VECINIT11_I]], i8 %a, i32 12
8308 // CHECK:   [[VECINIT13_I:%.*]] = insertelement <16 x i8> [[VECINIT12_I]], i8 %a, i32 13
8309 // CHECK:   [[VECINIT14_I:%.*]] = insertelement <16 x i8> [[VECINIT13_I]], i8 %a, i32 14
8310 // CHECK:   [[VECINIT15_I:%.*]] = insertelement <16 x i8> [[VECINIT14_I]], i8 %a, i32 15
8311 // CHECK:   ret <16 x i8> [[VECINIT15_I]]
test_vmovq_n_p8(poly8_t a)8312 poly8x16_t test_vmovq_n_p8(poly8_t a) {
8313   return vmovq_n_p8(a);
8314 }
8315 
8316 // CHECK-LABEL: @test_vmovq_n_p16(
8317 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %a, i32 0
8318 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %a, i32 1
8319 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %a, i32 2
8320 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %a, i32 3
8321 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %a, i32 4
8322 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %a, i32 5
8323 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %a, i32 6
8324 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %a, i32 7
8325 // CHECK:   ret <8 x i16> [[VECINIT7_I]]
test_vmovq_n_p16(poly16_t a)8326 poly16x8_t test_vmovq_n_p16(poly16_t a) {
8327   return vmovq_n_p16(a);
8328 }
8329 
8330 // CHECK-LABEL: @test_vmovq_n_f16(
8331 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
8332 // CHECK:   [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[TMP0]], i32 0
8333 // CHECK:   [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[TMP0]], i32 1
8334 // CHECK:   [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[TMP0]], i32 2
8335 // CHECK:   [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[TMP0]], i32 3
8336 // CHECK:   [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[TMP0]], i32 4
8337 // CHECK:   [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[TMP0]], i32 5
8338 // CHECK:   [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[TMP0]], i32 6
8339 // CHECK:   [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[TMP0]], i32 7
8340 // CHECK:   ret <8 x half> [[VECINIT7]]
test_vmovq_n_f16(float16_t * a)8341 float16x8_t test_vmovq_n_f16(float16_t *a) {
8342   return vmovq_n_f16(*a);
8343 }
8344 
8345 // CHECK-LABEL: @test_vmovq_n_f32(
8346 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %a, i32 0
8347 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %a, i32 1
8348 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %a, i32 2
8349 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %a, i32 3
8350 // CHECK:   ret <4 x float> [[VECINIT3_I]]
test_vmovq_n_f32(float32_t a)8351 float32x4_t test_vmovq_n_f32(float32_t a) {
8352   return vmovq_n_f32(a);
8353 }
8354 
8355 // CHECK-LABEL: @test_vmov_n_s64(
8356 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8357 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8358 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vmov_n_s64(int64_t a)8359 int64x1_t test_vmov_n_s64(int64_t a) {
8360   int64x1_t tmp = vmov_n_s64(a);
8361   return vadd_s64(tmp, tmp);
8362 }
8363 
8364 // CHECK-LABEL: @test_vmov_n_u64(
8365 // CHECK:   [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
8366 // CHECK:   [[ADD_I:%.*]] = add <1 x i64> [[VECINIT_I]], [[VECINIT_I]]
8367 // CHECK:   ret <1 x i64> [[ADD_I]]
test_vmov_n_u64(uint64_t a)8368 uint64x1_t test_vmov_n_u64(uint64_t a) {
8369   uint64x1_t tmp = vmov_n_u64(a);
8370   return vadd_u64(tmp, tmp);
8371 }
8372 
8373 // CHECK-LABEL: @test_vmovq_n_s64(
8374 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8375 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8376 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_s64(int64_t a)8377 int64x2_t test_vmovq_n_s64(int64_t a) {
8378   return vmovq_n_s64(a);
8379 }
8380 
8381 // CHECK-LABEL: @test_vmovq_n_u64(
8382 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
8383 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
8384 // CHECK:   ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_u64(uint64_t a)8385 uint64x2_t test_vmovq_n_u64(uint64_t a) {
8386   return vmovq_n_u64(a);
8387 }
8388 
8389 // CHECK-LABEL: @test_vmul_s8(
8390 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8391 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_s8(int8x8_t a,int8x8_t b)8392 int8x8_t test_vmul_s8(int8x8_t a, int8x8_t b) {
8393   return vmul_s8(a, b);
8394 }
8395 
8396 // CHECK-LABEL: @test_vmul_s16(
8397 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8398 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_s16(int16x4_t a,int16x4_t b)8399 int16x4_t test_vmul_s16(int16x4_t a, int16x4_t b) {
8400   return vmul_s16(a, b);
8401 }
8402 
8403 // CHECK-LABEL: @test_vmul_s32(
8404 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8405 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_s32(int32x2_t a,int32x2_t b)8406 int32x2_t test_vmul_s32(int32x2_t a, int32x2_t b) {
8407   return vmul_s32(a, b);
8408 }
8409 
8410 // CHECK-LABEL: @test_vmul_f32(
8411 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %a, %b
8412 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_f32(float32x2_t a,float32x2_t b)8413 float32x2_t test_vmul_f32(float32x2_t a, float32x2_t b) {
8414   return vmul_f32(a, b);
8415 }
8416 
8417 // CHECK-LABEL: @test_vmul_u8(
8418 // CHECK:   [[MUL_I:%.*]] = mul <8 x i8> %a, %b
8419 // CHECK:   ret <8 x i8> [[MUL_I]]
test_vmul_u8(uint8x8_t a,uint8x8_t b)8420 uint8x8_t test_vmul_u8(uint8x8_t a, uint8x8_t b) {
8421   return vmul_u8(a, b);
8422 }
8423 
8424 // CHECK-LABEL: @test_vmul_u16(
8425 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, %b
8426 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_u16(uint16x4_t a,uint16x4_t b)8427 uint16x4_t test_vmul_u16(uint16x4_t a, uint16x4_t b) {
8428   return vmul_u16(a, b);
8429 }
8430 
8431 // CHECK-LABEL: @test_vmul_u32(
8432 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, %b
8433 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_u32(uint32x2_t a,uint32x2_t b)8434 uint32x2_t test_vmul_u32(uint32x2_t a, uint32x2_t b) {
8435   return vmul_u32(a, b);
8436 }
8437 
8438 // CHECK-LABEL: @test_vmulq_s8(
8439 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8440 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_s8(int8x16_t a,int8x16_t b)8441 int8x16_t test_vmulq_s8(int8x16_t a, int8x16_t b) {
8442   return vmulq_s8(a, b);
8443 }
8444 
8445 // CHECK-LABEL: @test_vmulq_s16(
8446 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8447 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_s16(int16x8_t a,int16x8_t b)8448 int16x8_t test_vmulq_s16(int16x8_t a, int16x8_t b) {
8449   return vmulq_s16(a, b);
8450 }
8451 
8452 // CHECK-LABEL: @test_vmulq_s32(
8453 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8454 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_s32(int32x4_t a,int32x4_t b)8455 int32x4_t test_vmulq_s32(int32x4_t a, int32x4_t b) {
8456   return vmulq_s32(a, b);
8457 }
8458 
8459 // CHECK-LABEL: @test_vmulq_f32(
8460 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %a, %b
8461 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_f32(float32x4_t a,float32x4_t b)8462 float32x4_t test_vmulq_f32(float32x4_t a, float32x4_t b) {
8463   return vmulq_f32(a, b);
8464 }
8465 
8466 // CHECK-LABEL: @test_vmulq_u8(
8467 // CHECK:   [[MUL_I:%.*]] = mul <16 x i8> %a, %b
8468 // CHECK:   ret <16 x i8> [[MUL_I]]
test_vmulq_u8(uint8x16_t a,uint8x16_t b)8469 uint8x16_t test_vmulq_u8(uint8x16_t a, uint8x16_t b) {
8470   return vmulq_u8(a, b);
8471 }
8472 
8473 // CHECK-LABEL: @test_vmulq_u16(
8474 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, %b
8475 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_u16(uint16x8_t a,uint16x8_t b)8476 uint16x8_t test_vmulq_u16(uint16x8_t a, uint16x8_t b) {
8477   return vmulq_u16(a, b);
8478 }
8479 
8480 // CHECK-LABEL: @test_vmulq_u32(
8481 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, %b
8482 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_u32(uint32x4_t a,uint32x4_t b)8483 uint32x4_t test_vmulq_u32(uint32x4_t a, uint32x4_t b) {
8484   return vmulq_u32(a, b);
8485 }
8486 
8487 // CHECK-LABEL: @test_vmull_s8(
8488 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmulls.v8i16(<8 x i8> %a, <8 x i8> %b)
8489 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_s8(int8x8_t a,int8x8_t b)8490 int16x8_t test_vmull_s8(int8x8_t a, int8x8_t b) {
8491   return vmull_s8(a, b);
8492 }
8493 
8494 // CHECK-LABEL: @test_vmull_s16(
8495 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8496 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8497 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> %b)
8498 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_s16(int16x4_t a,int16x4_t b)8499 int32x4_t test_vmull_s16(int16x4_t a, int16x4_t b) {
8500   return vmull_s16(a, b);
8501 }
8502 
8503 // CHECK-LABEL: @test_vmull_s32(
8504 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8505 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8506 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> %b)
8507 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_s32(int32x2_t a,int32x2_t b)8508 int64x2_t test_vmull_s32(int32x2_t a, int32x2_t b) {
8509   return vmull_s32(a, b);
8510 }
8511 
8512 // CHECK-LABEL: @test_vmull_u8(
8513 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullu.v8i16(<8 x i8> %a, <8 x i8> %b)
8514 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_u8(uint8x8_t a,uint8x8_t b)8515 uint16x8_t test_vmull_u8(uint8x8_t a, uint8x8_t b) {
8516   return vmull_u8(a, b);
8517 }
8518 
8519 // CHECK-LABEL: @test_vmull_u16(
8520 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8521 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
8522 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> %b)
8523 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_u16(uint16x4_t a,uint16x4_t b)8524 uint32x4_t test_vmull_u16(uint16x4_t a, uint16x4_t b) {
8525   return vmull_u16(a, b);
8526 }
8527 
8528 // CHECK-LABEL: @test_vmull_u32(
8529 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8530 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
8531 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> %b)
8532 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_u32(uint32x2_t a,uint32x2_t b)8533 uint64x2_t test_vmull_u32(uint32x2_t a, uint32x2_t b) {
8534   return vmull_u32(a, b);
8535 }
8536 
8537 // CHECK-LABEL: @test_vmull_p8(
8538 // CHECK:   [[VMULL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vmullp.v8i16(<8 x i8> %a, <8 x i8> %b)
8539 // CHECK:   ret <8 x i16> [[VMULL_I]]
test_vmull_p8(poly8x8_t a,poly8x8_t b)8540 poly16x8_t test_vmull_p8(poly8x8_t a, poly8x8_t b) {
8541   return vmull_p8(a, b);
8542 }
8543 
8544 // CHECK-LABEL: @test_vmull_lane_s16(
8545 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8546 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8547 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8548 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8549 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8550 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
8551 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_s16(int16x4_t a,int16x4_t b)8552 int32x4_t test_vmull_lane_s16(int16x4_t a, int16x4_t b) {
8553   return vmull_lane_s16(a, b, 3);
8554 }
8555 
8556 // CHECK-LABEL: @test_vmull_lane_s32(
8557 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8558 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8559 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8560 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8561 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8562 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
8563 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_s32(int32x2_t a,int32x2_t b)8564 int64x2_t test_vmull_lane_s32(int32x2_t a, int32x2_t b) {
8565   return vmull_lane_s32(a, b, 1);
8566 }
8567 
8568 // CHECK-LABEL: @test_vmull_lane_u16(
8569 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8570 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8571 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8572 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
8573 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
8574 // CHECK:   [[VMULL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
8575 // CHECK:   ret <4 x i32> [[VMULL2_I]]
test_vmull_lane_u16(uint16x4_t a,uint16x4_t b)8576 uint32x4_t test_vmull_lane_u16(uint16x4_t a, uint16x4_t b) {
8577   return vmull_lane_u16(a, b, 3);
8578 }
8579 
8580 // CHECK-LABEL: @test_vmull_lane_u32(
8581 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8582 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8583 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8584 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
8585 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
8586 // CHECK:   [[VMULL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
8587 // CHECK:   ret <2 x i64> [[VMULL2_I]]
test_vmull_lane_u32(uint32x2_t a,uint32x2_t b)8588 uint64x2_t test_vmull_lane_u32(uint32x2_t a, uint32x2_t b) {
8589   return vmull_lane_u32(a, b, 1);
8590 }
8591 
8592 // CHECK-LABEL: @test_vmull_n_s16(
8593 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8594 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8595 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8596 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8597 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8598 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8599 // CHECK:   [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8600 // CHECK:   ret <4 x i32> [[VMULL5_I]]
test_vmull_n_s16(int16x4_t a,int16_t b)8601 int32x4_t test_vmull_n_s16(int16x4_t a, int16_t b) {
8602   return vmull_n_s16(a, b);
8603 }
8604 
8605 // CHECK-LABEL: @test_vmull_n_s32(
8606 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8607 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8608 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8609 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8610 // CHECK:   [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmulls.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8611 // CHECK:   ret <2 x i64> [[VMULL3_I]]
test_vmull_n_s32(int32x2_t a,int32_t b)8612 int64x2_t test_vmull_n_s32(int32x2_t a, int32_t b) {
8613   return vmull_n_s32(a, b);
8614 }
8615 
8616 // CHECK-LABEL: @test_vmull_n_u16(
8617 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8618 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8619 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8620 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8621 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
8622 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
8623 // CHECK:   [[VMULL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
8624 // CHECK:   ret <4 x i32> [[VMULL5_I]]
test_vmull_n_u16(uint16x4_t a,uint16_t b)8625 uint32x4_t test_vmull_n_u16(uint16x4_t a, uint16_t b) {
8626   return vmull_n_u16(a, b);
8627 }
8628 
8629 // CHECK-LABEL: @test_vmull_n_u32(
8630 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8631 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8632 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
8633 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
8634 // CHECK:   [[VMULL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vmullu.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
8635 // CHECK:   ret <2 x i64> [[VMULL3_I]]
test_vmull_n_u32(uint32x2_t a,uint32_t b)8636 uint64x2_t test_vmull_n_u32(uint32x2_t a, uint32_t b) {
8637   return vmull_n_u32(a, b);
8638 }
8639 
8640 // CHECK-LABEL: @test_vmul_p8(
8641 // CHECK:   [[VMUL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vmulp.v8i8(<8 x i8> %a, <8 x i8> %b)
8642 // CHECK:   ret <8 x i8> [[VMUL_V_I]]
test_vmul_p8(poly8x8_t a,poly8x8_t b)8643 poly8x8_t test_vmul_p8(poly8x8_t a, poly8x8_t b) {
8644   return vmul_p8(a, b);
8645 }
8646 
8647 // CHECK-LABEL: @test_vmulq_p8(
8648 // CHECK:   [[VMULQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vmulp.v16i8(<16 x i8> %a, <16 x i8> %b)
8649 // CHECK:   ret <16 x i8> [[VMULQ_V_I]]
test_vmulq_p8(poly8x16_t a,poly8x16_t b)8650 poly8x16_t test_vmulq_p8(poly8x16_t a, poly8x16_t b) {
8651   return vmulq_p8(a, b);
8652 }
8653 
8654 // CHECK-LABEL: @test_vmul_lane_s16(
8655 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8656 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8657 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8658 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8659 // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_s16(int16x4_t a,int16x4_t b)8660 int16x4_t test_vmul_lane_s16(int16x4_t a, int16x4_t b) {
8661   return vmul_lane_s16(a, b, 3);
8662 }
8663 
8664 // CHECK-LABEL: @test_vmul_lane_s32(
8665 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8666 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8667 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8668 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8669 // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_s32(int32x2_t a,int32x2_t b)8670 int32x2_t test_vmul_lane_s32(int32x2_t a, int32x2_t b) {
8671   return vmul_lane_s32(a, b, 1);
8672 }
8673 
8674 // CHECK-LABEL: @test_vmul_lane_f32(
8675 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8676 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8677 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <2 x i32> <i32 1, i32 1>
8678 // CHECK:   [[MUL:%.*]] = fmul <2 x float> [[A:%.*]], [[LANE]]
8679 // CHECK:   ret <2 x float> [[MUL]]
test_vmul_lane_f32(float32x2_t a,float32x2_t b)8680 float32x2_t test_vmul_lane_f32(float32x2_t a, float32x2_t b) {
8681   return vmul_lane_f32(a, b, 1);
8682 }
8683 
8684 // CHECK-LABEL: @test_vmul_lane_u16(
8685 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8686 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8687 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
8688 // CHECK:   [[MUL:%.*]] = mul <4 x i16> [[A:%.*]], [[LANE]]
8689 // CHECK:   ret <4 x i16> [[MUL]]
test_vmul_lane_u16(uint16x4_t a,uint16x4_t b)8690 uint16x4_t test_vmul_lane_u16(uint16x4_t a, uint16x4_t b) {
8691   return vmul_lane_u16(a, b, 3);
8692 }
8693 
8694 // CHECK-LABEL: @test_vmul_lane_u32(
8695 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8696 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8697 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
8698 // CHECK:   [[MUL:%.*]] = mul <2 x i32> [[A:%.*]], [[LANE]]
8699 // CHECK:   ret <2 x i32> [[MUL]]
test_vmul_lane_u32(uint32x2_t a,uint32x2_t b)8700 uint32x2_t test_vmul_lane_u32(uint32x2_t a, uint32x2_t b) {
8701   return vmul_lane_u32(a, b, 1);
8702 }
8703 
8704 // CHECK-LABEL: @test_vmulq_lane_s16(
8705 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8706 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8707 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8708 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8709 // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_s16(int16x8_t a,int16x4_t b)8710 int16x8_t test_vmulq_lane_s16(int16x8_t a, int16x4_t b) {
8711   return vmulq_lane_s16(a, b, 3);
8712 }
8713 
8714 // CHECK-LABEL: @test_vmulq_lane_s32(
8715 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8716 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8717 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8718 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8719 // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_s32(int32x4_t a,int32x2_t b)8720 int32x4_t test_vmulq_lane_s32(int32x4_t a, int32x2_t b) {
8721   return vmulq_lane_s32(a, b, 1);
8722 }
8723 
8724 // CHECK-LABEL: @test_vmulq_lane_f32(
8725 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> [[B:%.*]] to <8 x i8>
8726 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float>
8727 // CHECK:   [[LANE:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8728 // CHECK:   [[MUL:%.*]] = fmul <4 x float> [[A:%.*]], [[LANE]]
8729 // CHECK:   ret <4 x float> [[MUL]]
test_vmulq_lane_f32(float32x4_t a,float32x2_t b)8730 float32x4_t test_vmulq_lane_f32(float32x4_t a, float32x2_t b) {
8731   return vmulq_lane_f32(a, b, 1);
8732 }
8733 
8734 // CHECK-LABEL: @test_vmulq_lane_u16(
8735 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
8736 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
8737 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
8738 // CHECK:   [[MUL:%.*]] = mul <8 x i16> [[A:%.*]], [[LANE]]
8739 // CHECK:   ret <8 x i16> [[MUL]]
test_vmulq_lane_u16(uint16x8_t a,uint16x4_t b)8740 uint16x8_t test_vmulq_lane_u16(uint16x8_t a, uint16x4_t b) {
8741   return vmulq_lane_u16(a, b, 3);
8742 }
8743 
8744 // CHECK-LABEL: @test_vmulq_lane_u32(
8745 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
8746 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
8747 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
8748 // CHECK:   [[MUL:%.*]] = mul <4 x i32> [[A:%.*]], [[LANE]]
8749 // CHECK:   ret <4 x i32> [[MUL]]
test_vmulq_lane_u32(uint32x4_t a,uint32x2_t b)8750 uint32x4_t test_vmulq_lane_u32(uint32x4_t a, uint32x2_t b) {
8751   return vmulq_lane_u32(a, b, 1);
8752 }
8753 
8754 // CHECK-LABEL: @test_vmul_n_s16(
8755 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8756 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8757 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8758 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8759 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8760 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_n_s16(int16x4_t a,int16_t b)8761 int16x4_t test_vmul_n_s16(int16x4_t a, int16_t b) {
8762   return vmul_n_s16(a, b);
8763 }
8764 
8765 // CHECK-LABEL: @test_vmul_n_s32(
8766 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8767 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8768 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8769 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_n_s32(int32x2_t a,int32_t b)8770 int32x2_t test_vmul_n_s32(int32x2_t a, int32_t b) {
8771   return vmul_n_s32(a, b);
8772 }
8773 
8774 // CHECK-LABEL: @test_vmul_n_f32(
8775 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x float> undef, float %b, i32 0
8776 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x float> [[VECINIT_I]], float %b, i32 1
8777 // CHECK:   [[MUL_I:%.*]] = fmul <2 x float> %a, [[VECINIT1_I]]
8778 // CHECK:   ret <2 x float> [[MUL_I]]
test_vmul_n_f32(float32x2_t a,float32_t b)8779 float32x2_t test_vmul_n_f32(float32x2_t a, float32_t b) {
8780   return vmul_n_f32(a, b);
8781 }
8782 
8783 // CHECK-LABEL: @test_vmul_n_u16(
8784 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
8785 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
8786 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
8787 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
8788 // CHECK:   [[MUL_I:%.*]] = mul <4 x i16> %a, [[VECINIT3_I]]
8789 // CHECK:   ret <4 x i16> [[MUL_I]]
test_vmul_n_u16(uint16x4_t a,uint16_t b)8790 uint16x4_t test_vmul_n_u16(uint16x4_t a, uint16_t b) {
8791   return vmul_n_u16(a, b);
8792 }
8793 
8794 // CHECK-LABEL: @test_vmul_n_u32(
8795 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
8796 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
8797 // CHECK:   [[MUL_I:%.*]] = mul <2 x i32> %a, [[VECINIT1_I]]
8798 // CHECK:   ret <2 x i32> [[MUL_I]]
test_vmul_n_u32(uint32x2_t a,uint32_t b)8799 uint32x2_t test_vmul_n_u32(uint32x2_t a, uint32_t b) {
8800   return vmul_n_u32(a, b);
8801 }
8802 
8803 // CHECK-LABEL: @test_vmulq_n_s16(
8804 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8805 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8806 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8807 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8808 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8809 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8810 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8811 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8812 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8813 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_n_s16(int16x8_t a,int16_t b)8814 int16x8_t test_vmulq_n_s16(int16x8_t a, int16_t b) {
8815   return vmulq_n_s16(a, b);
8816 }
8817 
8818 // CHECK-LABEL: @test_vmulq_n_s32(
8819 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8820 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8821 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8822 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8823 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8824 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_n_s32(int32x4_t a,int32_t b)8825 int32x4_t test_vmulq_n_s32(int32x4_t a, int32_t b) {
8826   return vmulq_n_s32(a, b);
8827 }
8828 
8829 // CHECK-LABEL: @test_vmulq_n_f32(
8830 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float %b, i32 0
8831 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x float> [[VECINIT_I]], float %b, i32 1
8832 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x float> [[VECINIT1_I]], float %b, i32 2
8833 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x float> [[VECINIT2_I]], float %b, i32 3
8834 // CHECK:   [[MUL_I:%.*]] = fmul <4 x float> %a, [[VECINIT3_I]]
8835 // CHECK:   ret <4 x float> [[MUL_I]]
test_vmulq_n_f32(float32x4_t a,float32_t b)8836 float32x4_t test_vmulq_n_f32(float32x4_t a, float32_t b) {
8837   return vmulq_n_f32(a, b);
8838 }
8839 
8840 // CHECK-LABEL: @test_vmulq_n_u16(
8841 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
8842 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
8843 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
8844 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
8845 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
8846 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
8847 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
8848 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
8849 // CHECK:   [[MUL_I:%.*]] = mul <8 x i16> %a, [[VECINIT7_I]]
8850 // CHECK:   ret <8 x i16> [[MUL_I]]
test_vmulq_n_u16(uint16x8_t a,uint16_t b)8851 uint16x8_t test_vmulq_n_u16(uint16x8_t a, uint16_t b) {
8852   return vmulq_n_u16(a, b);
8853 }
8854 
8855 // CHECK-LABEL: @test_vmulq_n_u32(
8856 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
8857 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
8858 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
8859 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
8860 // CHECK:   [[MUL_I:%.*]] = mul <4 x i32> %a, [[VECINIT3_I]]
8861 // CHECK:   ret <4 x i32> [[MUL_I]]
test_vmulq_n_u32(uint32x4_t a,uint32_t b)8862 uint32x4_t test_vmulq_n_u32(uint32x4_t a, uint32_t b) {
8863   return vmulq_n_u32(a, b);
8864 }
8865 
8866 // CHECK-LABEL: @test_vmvn_s8(
8867 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8868 // CHECK:   ret <8 x i8> [[NEG_I]]
test_vmvn_s8(int8x8_t a)8869 int8x8_t test_vmvn_s8(int8x8_t a) {
8870   return vmvn_s8(a);
8871 }
8872 
8873 // CHECK-LABEL: @test_vmvn_s16(
8874 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8875 // CHECK:   ret <4 x i16> [[NEG_I]]
test_vmvn_s16(int16x4_t a)8876 int16x4_t test_vmvn_s16(int16x4_t a) {
8877   return vmvn_s16(a);
8878 }
8879 
8880 // CHECK-LABEL: @test_vmvn_s32(
8881 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8882 // CHECK:   ret <2 x i32> [[NEG_I]]
test_vmvn_s32(int32x2_t a)8883 int32x2_t test_vmvn_s32(int32x2_t a) {
8884   return vmvn_s32(a);
8885 }
8886 
8887 // CHECK-LABEL: @test_vmvn_u8(
8888 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8889 // CHECK:   ret <8 x i8> [[NEG_I]]
test_vmvn_u8(uint8x8_t a)8890 uint8x8_t test_vmvn_u8(uint8x8_t a) {
8891   return vmvn_u8(a);
8892 }
8893 
8894 // CHECK-LABEL: @test_vmvn_u16(
8895 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1>
8896 // CHECK:   ret <4 x i16> [[NEG_I]]
test_vmvn_u16(uint16x4_t a)8897 uint16x4_t test_vmvn_u16(uint16x4_t a) {
8898   return vmvn_u16(a);
8899 }
8900 
8901 // CHECK-LABEL: @test_vmvn_u32(
8902 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %a, <i32 -1, i32 -1>
8903 // CHECK:   ret <2 x i32> [[NEG_I]]
test_vmvn_u32(uint32x2_t a)8904 uint32x2_t test_vmvn_u32(uint32x2_t a) {
8905   return vmvn_u32(a);
8906 }
8907 
8908 // CHECK-LABEL: @test_vmvn_p8(
8909 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8910 // CHECK:   ret <8 x i8> [[NEG_I]]
test_vmvn_p8(poly8x8_t a)8911 poly8x8_t test_vmvn_p8(poly8x8_t a) {
8912   return vmvn_p8(a);
8913 }
8914 
8915 // CHECK-LABEL: @test_vmvnq_s8(
8916 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8917 // CHECK:   ret <16 x i8> [[NEG_I]]
test_vmvnq_s8(int8x16_t a)8918 int8x16_t test_vmvnq_s8(int8x16_t a) {
8919   return vmvnq_s8(a);
8920 }
8921 
8922 // CHECK-LABEL: @test_vmvnq_s16(
8923 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8924 // CHECK:   ret <8 x i16> [[NEG_I]]
test_vmvnq_s16(int16x8_t a)8925 int16x8_t test_vmvnq_s16(int16x8_t a) {
8926   return vmvnq_s16(a);
8927 }
8928 
8929 // CHECK-LABEL: @test_vmvnq_s32(
8930 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8931 // CHECK:   ret <4 x i32> [[NEG_I]]
test_vmvnq_s32(int32x4_t a)8932 int32x4_t test_vmvnq_s32(int32x4_t a) {
8933   return vmvnq_s32(a);
8934 }
8935 
8936 // CHECK-LABEL: @test_vmvnq_u8(
8937 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8938 // CHECK:   ret <16 x i8> [[NEG_I]]
test_vmvnq_u8(uint8x16_t a)8939 uint8x16_t test_vmvnq_u8(uint8x16_t a) {
8940   return vmvnq_u8(a);
8941 }
8942 
8943 // CHECK-LABEL: @test_vmvnq_u16(
8944 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %a, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
8945 // CHECK:   ret <8 x i16> [[NEG_I]]
test_vmvnq_u16(uint16x8_t a)8946 uint16x8_t test_vmvnq_u16(uint16x8_t a) {
8947   return vmvnq_u16(a);
8948 }
8949 
8950 // CHECK-LABEL: @test_vmvnq_u32(
8951 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %a, <i32 -1, i32 -1, i32 -1, i32 -1>
8952 // CHECK:   ret <4 x i32> [[NEG_I]]
test_vmvnq_u32(uint32x4_t a)8953 uint32x4_t test_vmvnq_u32(uint32x4_t a) {
8954   return vmvnq_u32(a);
8955 }
8956 
8957 // CHECK-LABEL: @test_vmvnq_p8(
8958 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
8959 // CHECK:   ret <16 x i8> [[NEG_I]]
test_vmvnq_p8(poly8x16_t a)8960 poly8x16_t test_vmvnq_p8(poly8x16_t a) {
8961   return vmvnq_p8(a);
8962 }
8963 
8964 // CHECK-LABEL: @test_vneg_s8(
8965 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, %a
8966 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vneg_s8(int8x8_t a)8967 int8x8_t test_vneg_s8(int8x8_t a) {
8968   return vneg_s8(a);
8969 }
8970 
8971 // CHECK-LABEL: @test_vneg_s16(
8972 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, %a
8973 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vneg_s16(int16x4_t a)8974 int16x4_t test_vneg_s16(int16x4_t a) {
8975   return vneg_s16(a);
8976 }
8977 
8978 // CHECK-LABEL: @test_vneg_s32(
8979 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, %a
8980 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vneg_s32(int32x2_t a)8981 int32x2_t test_vneg_s32(int32x2_t a) {
8982   return vneg_s32(a);
8983 }
8984 
8985 // CHECK-LABEL: @test_vneg_f32(
8986 // CHECK:   [[SUB_I:%.*]] = fneg <2 x float> %a
8987 // CHECK:   ret <2 x float> [[SUB_I]]
test_vneg_f32(float32x2_t a)8988 float32x2_t test_vneg_f32(float32x2_t a) {
8989   return vneg_f32(a);
8990 }
8991 
8992 // CHECK-LABEL: @test_vnegq_s8(
8993 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> zeroinitializer, %a
8994 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vnegq_s8(int8x16_t a)8995 int8x16_t test_vnegq_s8(int8x16_t a) {
8996   return vnegq_s8(a);
8997 }
8998 
8999 // CHECK-LABEL: @test_vnegq_s16(
9000 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> zeroinitializer, %a
9001 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vnegq_s16(int16x8_t a)9002 int16x8_t test_vnegq_s16(int16x8_t a) {
9003   return vnegq_s16(a);
9004 }
9005 
9006 // CHECK-LABEL: @test_vnegq_s32(
9007 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> zeroinitializer, %a
9008 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vnegq_s32(int32x4_t a)9009 int32x4_t test_vnegq_s32(int32x4_t a) {
9010   return vnegq_s32(a);
9011 }
9012 
9013 // CHECK-LABEL: @test_vnegq_f32(
9014 // CHECK:   [[SUB_I:%.*]] = fneg <4 x float> %a
9015 // CHECK:   ret <4 x float> [[SUB_I]]
test_vnegq_f32(float32x4_t a)9016 float32x4_t test_vnegq_f32(float32x4_t a) {
9017   return vnegq_f32(a);
9018 }
9019 
9020 // CHECK-LABEL: @test_vorn_s8(
9021 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9022 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
9023 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorn_s8(int8x8_t a,int8x8_t b)9024 int8x8_t test_vorn_s8(int8x8_t a, int8x8_t b) {
9025   return vorn_s8(a, b);
9026 }
9027 
9028 // CHECK-LABEL: @test_vorn_s16(
9029 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
9030 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
9031 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorn_s16(int16x4_t a,int16x4_t b)9032 int16x4_t test_vorn_s16(int16x4_t a, int16x4_t b) {
9033   return vorn_s16(a, b);
9034 }
9035 
9036 // CHECK-LABEL: @test_vorn_s32(
9037 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
9038 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
9039 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorn_s32(int32x2_t a,int32x2_t b)9040 int32x2_t test_vorn_s32(int32x2_t a, int32x2_t b) {
9041   return vorn_s32(a, b);
9042 }
9043 
9044 // CHECK-LABEL: @test_vorn_s64(
9045 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
9046 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
9047 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorn_s64(int64x1_t a,int64x1_t b)9048 int64x1_t test_vorn_s64(int64x1_t a, int64x1_t b) {
9049   return vorn_s64(a, b);
9050 }
9051 
9052 // CHECK-LABEL: @test_vorn_u8(
9053 // CHECK:   [[NEG_I:%.*]] = xor <8 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9054 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, [[NEG_I]]
9055 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorn_u8(uint8x8_t a,uint8x8_t b)9056 uint8x8_t test_vorn_u8(uint8x8_t a, uint8x8_t b) {
9057   return vorn_u8(a, b);
9058 }
9059 
9060 // CHECK-LABEL: @test_vorn_u16(
9061 // CHECK:   [[NEG_I:%.*]] = xor <4 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1>
9062 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, [[NEG_I]]
9063 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorn_u16(uint16x4_t a,uint16x4_t b)9064 uint16x4_t test_vorn_u16(uint16x4_t a, uint16x4_t b) {
9065   return vorn_u16(a, b);
9066 }
9067 
9068 // CHECK-LABEL: @test_vorn_u32(
9069 // CHECK:   [[NEG_I:%.*]] = xor <2 x i32> %b, <i32 -1, i32 -1>
9070 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, [[NEG_I]]
9071 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorn_u32(uint32x2_t a,uint32x2_t b)9072 uint32x2_t test_vorn_u32(uint32x2_t a, uint32x2_t b) {
9073   return vorn_u32(a, b);
9074 }
9075 
9076 // CHECK-LABEL: @test_vorn_u64(
9077 // CHECK:   [[NEG_I:%.*]] = xor <1 x i64> %b, <i64 -1>
9078 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, [[NEG_I]]
9079 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorn_u64(uint64x1_t a,uint64x1_t b)9080 uint64x1_t test_vorn_u64(uint64x1_t a, uint64x1_t b) {
9081   return vorn_u64(a, b);
9082 }
9083 
9084 // CHECK-LABEL: @test_vornq_s8(
9085 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9086 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9087 // CHECK:   ret <16 x i8> [[OR_I]]
test_vornq_s8(int8x16_t a,int8x16_t b)9088 int8x16_t test_vornq_s8(int8x16_t a, int8x16_t b) {
9089   return vornq_s8(a, b);
9090 }
9091 
9092 // CHECK-LABEL: @test_vornq_s16(
9093 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9094 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9095 // CHECK:   ret <8 x i16> [[OR_I]]
test_vornq_s16(int16x8_t a,int16x8_t b)9096 int16x8_t test_vornq_s16(int16x8_t a, int16x8_t b) {
9097   return vornq_s16(a, b);
9098 }
9099 
9100 // CHECK-LABEL: @test_vornq_s32(
9101 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9102 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9103 // CHECK:   ret <4 x i32> [[OR_I]]
test_vornq_s32(int32x4_t a,int32x4_t b)9104 int32x4_t test_vornq_s32(int32x4_t a, int32x4_t b) {
9105   return vornq_s32(a, b);
9106 }
9107 
9108 // CHECK-LABEL: @test_vornq_s64(
9109 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9110 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9111 // CHECK:   ret <2 x i64> [[OR_I]]
test_vornq_s64(int64x2_t a,int64x2_t b)9112 int64x2_t test_vornq_s64(int64x2_t a, int64x2_t b) {
9113   return vornq_s64(a, b);
9114 }
9115 
9116 // CHECK-LABEL: @test_vornq_u8(
9117 // CHECK:   [[NEG_I:%.*]] = xor <16 x i8> %b, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
9118 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, [[NEG_I]]
9119 // CHECK:   ret <16 x i8> [[OR_I]]
test_vornq_u8(uint8x16_t a,uint8x16_t b)9120 uint8x16_t test_vornq_u8(uint8x16_t a, uint8x16_t b) {
9121   return vornq_u8(a, b);
9122 }
9123 
9124 // CHECK-LABEL: @test_vornq_u16(
9125 // CHECK:   [[NEG_I:%.*]] = xor <8 x i16> %b, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
9126 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, [[NEG_I]]
9127 // CHECK:   ret <8 x i16> [[OR_I]]
test_vornq_u16(uint16x8_t a,uint16x8_t b)9128 uint16x8_t test_vornq_u16(uint16x8_t a, uint16x8_t b) {
9129   return vornq_u16(a, b);
9130 }
9131 
9132 // CHECK-LABEL: @test_vornq_u32(
9133 // CHECK:   [[NEG_I:%.*]] = xor <4 x i32> %b, <i32 -1, i32 -1, i32 -1, i32 -1>
9134 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, [[NEG_I]]
9135 // CHECK:   ret <4 x i32> [[OR_I]]
test_vornq_u32(uint32x4_t a,uint32x4_t b)9136 uint32x4_t test_vornq_u32(uint32x4_t a, uint32x4_t b) {
9137   return vornq_u32(a, b);
9138 }
9139 
9140 // CHECK-LABEL: @test_vornq_u64(
9141 // CHECK:   [[NEG_I:%.*]] = xor <2 x i64> %b, <i64 -1, i64 -1>
9142 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, [[NEG_I]]
9143 // CHECK:   ret <2 x i64> [[OR_I]]
test_vornq_u64(uint64x2_t a,uint64x2_t b)9144 uint64x2_t test_vornq_u64(uint64x2_t a, uint64x2_t b) {
9145   return vornq_u64(a, b);
9146 }
9147 
9148 // CHECK-LABEL: @test_vorr_s8(
9149 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, %b
9150 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorr_s8(int8x8_t a,int8x8_t b)9151 int8x8_t test_vorr_s8(int8x8_t a, int8x8_t b) {
9152   return vorr_s8(a, b);
9153 }
9154 
9155 // CHECK-LABEL: @test_vorr_s16(
9156 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, %b
9157 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorr_s16(int16x4_t a,int16x4_t b)9158 int16x4_t test_vorr_s16(int16x4_t a, int16x4_t b) {
9159   return vorr_s16(a, b);
9160 }
9161 
9162 // CHECK-LABEL: @test_vorr_s32(
9163 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, %b
9164 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorr_s32(int32x2_t a,int32x2_t b)9165 int32x2_t test_vorr_s32(int32x2_t a, int32x2_t b) {
9166   return vorr_s32(a, b);
9167 }
9168 
9169 // CHECK-LABEL: @test_vorr_s64(
9170 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, %b
9171 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorr_s64(int64x1_t a,int64x1_t b)9172 int64x1_t test_vorr_s64(int64x1_t a, int64x1_t b) {
9173   return vorr_s64(a, b);
9174 }
9175 
9176 // CHECK-LABEL: @test_vorr_u8(
9177 // CHECK:   [[OR_I:%.*]] = or <8 x i8> %a, %b
9178 // CHECK:   ret <8 x i8> [[OR_I]]
test_vorr_u8(uint8x8_t a,uint8x8_t b)9179 uint8x8_t test_vorr_u8(uint8x8_t a, uint8x8_t b) {
9180   return vorr_u8(a, b);
9181 }
9182 
9183 // CHECK-LABEL: @test_vorr_u16(
9184 // CHECK:   [[OR_I:%.*]] = or <4 x i16> %a, %b
9185 // CHECK:   ret <4 x i16> [[OR_I]]
test_vorr_u16(uint16x4_t a,uint16x4_t b)9186 uint16x4_t test_vorr_u16(uint16x4_t a, uint16x4_t b) {
9187   return vorr_u16(a, b);
9188 }
9189 
9190 // CHECK-LABEL: @test_vorr_u32(
9191 // CHECK:   [[OR_I:%.*]] = or <2 x i32> %a, %b
9192 // CHECK:   ret <2 x i32> [[OR_I]]
test_vorr_u32(uint32x2_t a,uint32x2_t b)9193 uint32x2_t test_vorr_u32(uint32x2_t a, uint32x2_t b) {
9194   return vorr_u32(a, b);
9195 }
9196 
9197 // CHECK-LABEL: @test_vorr_u64(
9198 // CHECK:   [[OR_I:%.*]] = or <1 x i64> %a, %b
9199 // CHECK:   ret <1 x i64> [[OR_I]]
test_vorr_u64(uint64x1_t a,uint64x1_t b)9200 uint64x1_t test_vorr_u64(uint64x1_t a, uint64x1_t b) {
9201   return vorr_u64(a, b);
9202 }
9203 
9204 // CHECK-LABEL: @test_vorrq_s8(
9205 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
9206 // CHECK:   ret <16 x i8> [[OR_I]]
test_vorrq_s8(int8x16_t a,int8x16_t b)9207 int8x16_t test_vorrq_s8(int8x16_t a, int8x16_t b) {
9208   return vorrq_s8(a, b);
9209 }
9210 
9211 // CHECK-LABEL: @test_vorrq_s16(
9212 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
9213 // CHECK:   ret <8 x i16> [[OR_I]]
test_vorrq_s16(int16x8_t a,int16x8_t b)9214 int16x8_t test_vorrq_s16(int16x8_t a, int16x8_t b) {
9215   return vorrq_s16(a, b);
9216 }
9217 
9218 // CHECK-LABEL: @test_vorrq_s32(
9219 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
9220 // CHECK:   ret <4 x i32> [[OR_I]]
test_vorrq_s32(int32x4_t a,int32x4_t b)9221 int32x4_t test_vorrq_s32(int32x4_t a, int32x4_t b) {
9222   return vorrq_s32(a, b);
9223 }
9224 
9225 // CHECK-LABEL: @test_vorrq_s64(
9226 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
9227 // CHECK:   ret <2 x i64> [[OR_I]]
test_vorrq_s64(int64x2_t a,int64x2_t b)9228 int64x2_t test_vorrq_s64(int64x2_t a, int64x2_t b) {
9229   return vorrq_s64(a, b);
9230 }
9231 
9232 // CHECK-LABEL: @test_vorrq_u8(
9233 // CHECK:   [[OR_I:%.*]] = or <16 x i8> %a, %b
9234 // CHECK:   ret <16 x i8> [[OR_I]]
test_vorrq_u8(uint8x16_t a,uint8x16_t b)9235 uint8x16_t test_vorrq_u8(uint8x16_t a, uint8x16_t b) {
9236   return vorrq_u8(a, b);
9237 }
9238 
9239 // CHECK-LABEL: @test_vorrq_u16(
9240 // CHECK:   [[OR_I:%.*]] = or <8 x i16> %a, %b
9241 // CHECK:   ret <8 x i16> [[OR_I]]
test_vorrq_u16(uint16x8_t a,uint16x8_t b)9242 uint16x8_t test_vorrq_u16(uint16x8_t a, uint16x8_t b) {
9243   return vorrq_u16(a, b);
9244 }
9245 
9246 // CHECK-LABEL: @test_vorrq_u32(
9247 // CHECK:   [[OR_I:%.*]] = or <4 x i32> %a, %b
9248 // CHECK:   ret <4 x i32> [[OR_I]]
test_vorrq_u32(uint32x4_t a,uint32x4_t b)9249 uint32x4_t test_vorrq_u32(uint32x4_t a, uint32x4_t b) {
9250   return vorrq_u32(a, b);
9251 }
9252 
9253 // CHECK-LABEL: @test_vorrq_u64(
9254 // CHECK:   [[OR_I:%.*]] = or <2 x i64> %a, %b
9255 // CHECK:   ret <2 x i64> [[OR_I]]
test_vorrq_u64(uint64x2_t a,uint64x2_t b)9256 uint64x2_t test_vorrq_u64(uint64x2_t a, uint64x2_t b) {
9257   return vorrq_u64(a, b);
9258 }
9259 
9260 // CHECK-LABEL: @test_vpadal_s8(
9261 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9262 // CHECK:   [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadals.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9263 // CHECK:   ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_s8(int16x4_t a,int8x8_t b)9264 int16x4_t test_vpadal_s8(int16x4_t a, int8x8_t b) {
9265   return vpadal_s8(a, b);
9266 }
9267 
9268 // CHECK-LABEL: @test_vpadal_s16(
9269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9270 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9271 // CHECK:   [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadals.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9272 // CHECK:   ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_s16(int32x2_t a,int16x4_t b)9273 int32x2_t test_vpadal_s16(int32x2_t a, int16x4_t b) {
9274   return vpadal_s16(a, b);
9275 }
9276 
9277 // CHECK-LABEL: @test_vpadal_s32(
9278 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9279 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9280 // CHECK:   [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadals.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9281 // CHECK:   ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_s32(int64x1_t a,int32x2_t b)9282 int64x1_t test_vpadal_s32(int64x1_t a, int32x2_t b) {
9283   return vpadal_s32(a, b);
9284 }
9285 
9286 // CHECK-LABEL: @test_vpadal_u8(
9287 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9288 // CHECK:   [[VPADAL_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadalu.v4i16.v8i8(<4 x i16> %a, <8 x i8> %b)
9289 // CHECK:   ret <4 x i16> [[VPADAL_V1_I]]
test_vpadal_u8(uint16x4_t a,uint8x8_t b)9290 uint16x4_t test_vpadal_u8(uint16x4_t a, uint8x8_t b) {
9291   return vpadal_u8(a, b);
9292 }
9293 
9294 // CHECK-LABEL: @test_vpadal_u16(
9295 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9296 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9297 // CHECK:   [[VPADAL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadalu.v2i32.v4i16(<2 x i32> %a, <4 x i16> %b)
9298 // CHECK:   ret <2 x i32> [[VPADAL_V2_I]]
test_vpadal_u16(uint32x2_t a,uint16x4_t b)9299 uint32x2_t test_vpadal_u16(uint32x2_t a, uint16x4_t b) {
9300   return vpadal_u16(a, b);
9301 }
9302 
9303 // CHECK-LABEL: @test_vpadal_u32(
9304 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9305 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9306 // CHECK:   [[VPADAL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpadalu.v1i64.v2i32(<1 x i64> %a, <2 x i32> %b)
9307 // CHECK:   ret <1 x i64> [[VPADAL_V2_I]]
test_vpadal_u32(uint64x1_t a,uint32x2_t b)9308 uint64x1_t test_vpadal_u32(uint64x1_t a, uint32x2_t b) {
9309   return vpadal_u32(a, b);
9310 }
9311 
9312 // CHECK-LABEL: @test_vpadalq_s8(
9313 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9314 // CHECK:   [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadals.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9315 // CHECK:   ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_s8(int16x8_t a,int8x16_t b)9316 int16x8_t test_vpadalq_s8(int16x8_t a, int8x16_t b) {
9317   return vpadalq_s8(a, b);
9318 }
9319 
9320 // CHECK-LABEL: @test_vpadalq_s16(
9321 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9322 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9323 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadals.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9324 // CHECK:   ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_s16(int32x4_t a,int16x8_t b)9325 int32x4_t test_vpadalq_s16(int32x4_t a, int16x8_t b) {
9326   return vpadalq_s16(a, b);
9327 }
9328 
9329 // CHECK-LABEL: @test_vpadalq_s32(
9330 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9331 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9332 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadals.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9333 // CHECK:   ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_s32(int64x2_t a,int32x4_t b)9334 int64x2_t test_vpadalq_s32(int64x2_t a, int32x4_t b) {
9335   return vpadalq_s32(a, b);
9336 }
9337 
9338 // CHECK-LABEL: @test_vpadalq_u8(
9339 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9340 // CHECK:   [[VPADALQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpadalu.v8i16.v16i8(<8 x i16> %a, <16 x i8> %b)
9341 // CHECK:   ret <8 x i16> [[VPADALQ_V1_I]]
test_vpadalq_u8(uint16x8_t a,uint8x16_t b)9342 uint16x8_t test_vpadalq_u8(uint16x8_t a, uint8x16_t b) {
9343   return vpadalq_u8(a, b);
9344 }
9345 
9346 // CHECK-LABEL: @test_vpadalq_u16(
9347 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9348 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9349 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpadalu.v4i32.v8i16(<4 x i32> %a, <8 x i16> %b)
9350 // CHECK:   ret <4 x i32> [[VPADALQ_V2_I]]
test_vpadalq_u16(uint32x4_t a,uint16x8_t b)9351 uint32x4_t test_vpadalq_u16(uint32x4_t a, uint16x8_t b) {
9352   return vpadalq_u16(a, b);
9353 }
9354 
9355 // CHECK-LABEL: @test_vpadalq_u32(
9356 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9357 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9358 // CHECK:   [[VPADALQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpadalu.v2i64.v4i32(<2 x i64> %a, <4 x i32> %b)
9359 // CHECK:   ret <2 x i64> [[VPADALQ_V2_I]]
test_vpadalq_u32(uint64x2_t a,uint32x4_t b)9360 uint64x2_t test_vpadalq_u32(uint64x2_t a, uint32x4_t b) {
9361   return vpadalq_u32(a, b);
9362 }
9363 
9364 // CHECK-LABEL: @test_vpadd_s8(
9365 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9366 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_s8(int8x8_t a,int8x8_t b)9367 int8x8_t test_vpadd_s8(int8x8_t a, int8x8_t b) {
9368   return vpadd_s8(a, b);
9369 }
9370 
9371 // CHECK-LABEL: @test_vpadd_s16(
9372 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9373 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9374 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9375 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9376 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_s16(int16x4_t a,int16x4_t b)9377 int16x4_t test_vpadd_s16(int16x4_t a, int16x4_t b) {
9378   return vpadd_s16(a, b);
9379 }
9380 
9381 // CHECK-LABEL: @test_vpadd_s32(
9382 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9383 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9384 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9385 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9386 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_s32(int32x2_t a,int32x2_t b)9387 int32x2_t test_vpadd_s32(int32x2_t a, int32x2_t b) {
9388   return vpadd_s32(a, b);
9389 }
9390 
9391 // CHECK-LABEL: @test_vpadd_u8(
9392 // CHECK:   [[VPADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %a, <8 x i8> %b)
9393 // CHECK:   ret <8 x i8> [[VPADD_V_I]]
test_vpadd_u8(uint8x8_t a,uint8x8_t b)9394 uint8x8_t test_vpadd_u8(uint8x8_t a, uint8x8_t b) {
9395   return vpadd_u8(a, b);
9396 }
9397 
9398 // CHECK-LABEL: @test_vpadd_u16(
9399 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9400 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9401 // CHECK:   [[VPADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %a, <4 x i16> %b)
9402 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <4 x i16> [[VPADD_V2_I]] to <8 x i8>
9403 // CHECK:   ret <4 x i16> [[VPADD_V2_I]]
test_vpadd_u16(uint16x4_t a,uint16x4_t b)9404 uint16x4_t test_vpadd_u16(uint16x4_t a, uint16x4_t b) {
9405   return vpadd_u16(a, b);
9406 }
9407 
9408 // CHECK-LABEL: @test_vpadd_u32(
9409 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9410 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9411 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %a, <2 x i32> %b)
9412 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x i32> [[VPADD_V2_I]] to <8 x i8>
9413 // CHECK:   ret <2 x i32> [[VPADD_V2_I]]
test_vpadd_u32(uint32x2_t a,uint32x2_t b)9414 uint32x2_t test_vpadd_u32(uint32x2_t a, uint32x2_t b) {
9415   return vpadd_u32(a, b);
9416 }
9417 
9418 // CHECK-LABEL: @test_vpadd_f32(
9419 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9420 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9421 // CHECK:   [[VPADD_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %a, <2 x float> %b)
9422 // CHECK:   [[VPADD_V3_I:%.*]] = bitcast <2 x float> [[VPADD_V2_I]] to <8 x i8>
9423 // CHECK:   ret <2 x float> [[VPADD_V2_I]]
test_vpadd_f32(float32x2_t a,float32x2_t b)9424 float32x2_t test_vpadd_f32(float32x2_t a, float32x2_t b) {
9425   return vpadd_f32(a, b);
9426 }
9427 
9428 // CHECK-LABEL: @test_vpaddl_s8(
9429 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8> %a)
9430 // CHECK:   ret <4 x i16> [[VPADDL_I]]
test_vpaddl_s8(int8x8_t a)9431 int16x4_t test_vpaddl_s8(int8x8_t a) {
9432   return vpaddl_s8(a);
9433 }
9434 
9435 // CHECK-LABEL: @test_vpaddl_s16(
9436 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9437 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16> %a)
9438 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_s16(int16x4_t a)9439 int32x2_t test_vpaddl_s16(int16x4_t a) {
9440   return vpaddl_s16(a);
9441 }
9442 
9443 // CHECK-LABEL: @test_vpaddl_s32(
9444 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9445 // CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32> %a)
9446 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_s32(int32x2_t a)9447 int64x1_t test_vpaddl_s32(int32x2_t a) {
9448   return vpaddl_s32(a);
9449 }
9450 
9451 // CHECK-LABEL: @test_vpaddl_u8(
9452 // CHECK:   [[VPADDL_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpaddlu.v4i16.v8i8(<8 x i8> %a)
9453 // CHECK:   ret <4 x i16> [[VPADDL_I]]
test_vpaddl_u8(uint8x8_t a)9454 uint16x4_t test_vpaddl_u8(uint8x8_t a) {
9455   return vpaddl_u8(a);
9456 }
9457 
9458 // CHECK-LABEL: @test_vpaddl_u16(
9459 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9460 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpaddlu.v2i32.v4i16(<4 x i16> %a)
9461 // CHECK:   ret <2 x i32> [[VPADDL1_I]]
test_vpaddl_u16(uint16x4_t a)9462 uint32x2_t test_vpaddl_u16(uint16x4_t a) {
9463   return vpaddl_u16(a);
9464 }
9465 
9466 // CHECK-LABEL: @test_vpaddl_u32(
9467 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9468 // CHECK:   [[VPADDL1_I:%.*]] = call <1 x i64> @llvm.arm.neon.vpaddlu.v1i64.v2i32(<2 x i32> %a)
9469 // CHECK:   ret <1 x i64> [[VPADDL1_I]]
test_vpaddl_u32(uint32x2_t a)9470 uint64x1_t test_vpaddl_u32(uint32x2_t a) {
9471   return vpaddl_u32(a);
9472 }
9473 
9474 // CHECK-LABEL: @test_vpaddlq_s8(
9475 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddls.v8i16.v16i8(<16 x i8> %a)
9476 // CHECK:   ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_s8(int8x16_t a)9477 int16x8_t test_vpaddlq_s8(int8x16_t a) {
9478   return vpaddlq_s8(a);
9479 }
9480 
9481 // CHECK-LABEL: @test_vpaddlq_s16(
9482 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9483 // CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddls.v4i32.v8i16(<8 x i16> %a)
9484 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_s16(int16x8_t a)9485 int32x4_t test_vpaddlq_s16(int16x8_t a) {
9486   return vpaddlq_s16(a);
9487 }
9488 
9489 // CHECK-LABEL: @test_vpaddlq_s32(
9490 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9491 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddls.v2i64.v4i32(<4 x i32> %a)
9492 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_s32(int32x4_t a)9493 int64x2_t test_vpaddlq_s32(int32x4_t a) {
9494   return vpaddlq_s32(a);
9495 }
9496 
9497 // CHECK-LABEL: @test_vpaddlq_u8(
9498 // CHECK:   [[VPADDL_I:%.*]] = call <8 x i16> @llvm.arm.neon.vpaddlu.v8i16.v16i8(<16 x i8> %a)
9499 // CHECK:   ret <8 x i16> [[VPADDL_I]]
test_vpaddlq_u8(uint8x16_t a)9500 uint16x8_t test_vpaddlq_u8(uint8x16_t a) {
9501   return vpaddlq_u8(a);
9502 }
9503 
9504 // CHECK-LABEL: @test_vpaddlq_u16(
9505 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9506 // CHECK:   [[VPADDL1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vpaddlu.v4i32.v8i16(<8 x i16> %a)
9507 // CHECK:   ret <4 x i32> [[VPADDL1_I]]
test_vpaddlq_u16(uint16x8_t a)9508 uint32x4_t test_vpaddlq_u16(uint16x8_t a) {
9509   return vpaddlq_u16(a);
9510 }
9511 
9512 // CHECK-LABEL: @test_vpaddlq_u32(
9513 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9514 // CHECK:   [[VPADDL1_I:%.*]] = call <2 x i64> @llvm.arm.neon.vpaddlu.v2i64.v4i32(<4 x i32> %a)
9515 // CHECK:   ret <2 x i64> [[VPADDL1_I]]
test_vpaddlq_u32(uint32x4_t a)9516 uint64x2_t test_vpaddlq_u32(uint32x4_t a) {
9517   return vpaddlq_u32(a);
9518 }
9519 
9520 // CHECK-LABEL: @test_vpmax_s8(
9521 // CHECK:   [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxs.v8i8(<8 x i8> %a, <8 x i8> %b)
9522 // CHECK:   ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_s8(int8x8_t a,int8x8_t b)9523 int8x8_t test_vpmax_s8(int8x8_t a, int8x8_t b) {
9524   return vpmax_s8(a, b);
9525 }
9526 
9527 // CHECK-LABEL: @test_vpmax_s16(
9528 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9529 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9530 // CHECK:   [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxs.v4i16(<4 x i16> %a, <4 x i16> %b)
9531 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9532 // CHECK:   ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_s16(int16x4_t a,int16x4_t b)9533 int16x4_t test_vpmax_s16(int16x4_t a, int16x4_t b) {
9534   return vpmax_s16(a, b);
9535 }
9536 
9537 // CHECK-LABEL: @test_vpmax_s32(
9538 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9539 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9540 // CHECK:   [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxs.v2i32(<2 x i32> %a, <2 x i32> %b)
9541 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9542 // CHECK:   ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_s32(int32x2_t a,int32x2_t b)9543 int32x2_t test_vpmax_s32(int32x2_t a, int32x2_t b) {
9544   return vpmax_s32(a, b);
9545 }
9546 
9547 // CHECK-LABEL: @test_vpmax_u8(
9548 // CHECK:   [[VPMAX_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmaxu.v8i8(<8 x i8> %a, <8 x i8> %b)
9549 // CHECK:   ret <8 x i8> [[VPMAX_V_I]]
test_vpmax_u8(uint8x8_t a,uint8x8_t b)9550 uint8x8_t test_vpmax_u8(uint8x8_t a, uint8x8_t b) {
9551   return vpmax_u8(a, b);
9552 }
9553 
9554 // CHECK-LABEL: @test_vpmax_u16(
9555 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9556 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9557 // CHECK:   [[VPMAX_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16> %a, <4 x i16> %b)
9558 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <4 x i16> [[VPMAX_V2_I]] to <8 x i8>
9559 // CHECK:   ret <4 x i16> [[VPMAX_V2_I]]
test_vpmax_u16(uint16x4_t a,uint16x4_t b)9560 uint16x4_t test_vpmax_u16(uint16x4_t a, uint16x4_t b) {
9561   return vpmax_u16(a, b);
9562 }
9563 
9564 // CHECK-LABEL: @test_vpmax_u32(
9565 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9566 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9567 // CHECK:   [[VPMAX_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32> %a, <2 x i32> %b)
9568 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <2 x i32> [[VPMAX_V2_I]] to <8 x i8>
9569 // CHECK:   ret <2 x i32> [[VPMAX_V2_I]]
test_vpmax_u32(uint32x2_t a,uint32x2_t b)9570 uint32x2_t test_vpmax_u32(uint32x2_t a, uint32x2_t b) {
9571   return vpmax_u32(a, b);
9572 }
9573 
9574 // CHECK-LABEL: @test_vpmax_f32(
9575 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9576 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9577 // CHECK:   [[VPMAX_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %a, <2 x float> %b)
9578 // CHECK:   [[VPMAX_V3_I:%.*]] = bitcast <2 x float> [[VPMAX_V2_I]] to <8 x i8>
9579 // CHECK:   ret <2 x float> [[VPMAX_V2_I]]
test_vpmax_f32(float32x2_t a,float32x2_t b)9580 float32x2_t test_vpmax_f32(float32x2_t a, float32x2_t b) {
9581   return vpmax_f32(a, b);
9582 }
9583 
9584 // CHECK-LABEL: @test_vpmin_s8(
9585 // CHECK:   [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpmins.v8i8(<8 x i8> %a, <8 x i8> %b)
9586 // CHECK:   ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_s8(int8x8_t a,int8x8_t b)9587 int8x8_t test_vpmin_s8(int8x8_t a, int8x8_t b) {
9588   return vpmin_s8(a, b);
9589 }
9590 
9591 // CHECK-LABEL: @test_vpmin_s16(
9592 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9593 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9594 // CHECK:   [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpmins.v4i16(<4 x i16> %a, <4 x i16> %b)
9595 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9596 // CHECK:   ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_s16(int16x4_t a,int16x4_t b)9597 int16x4_t test_vpmin_s16(int16x4_t a, int16x4_t b) {
9598   return vpmin_s16(a, b);
9599 }
9600 
9601 // CHECK-LABEL: @test_vpmin_s32(
9602 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9603 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9604 // CHECK:   [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpmins.v2i32(<2 x i32> %a, <2 x i32> %b)
9605 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9606 // CHECK:   ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_s32(int32x2_t a,int32x2_t b)9607 int32x2_t test_vpmin_s32(int32x2_t a, int32x2_t b) {
9608   return vpmin_s32(a, b);
9609 }
9610 
9611 // CHECK-LABEL: @test_vpmin_u8(
9612 // CHECK:   [[VPMIN_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vpminu.v8i8(<8 x i8> %a, <8 x i8> %b)
9613 // CHECK:   ret <8 x i8> [[VPMIN_V_I]]
test_vpmin_u8(uint8x8_t a,uint8x8_t b)9614 uint8x8_t test_vpmin_u8(uint8x8_t a, uint8x8_t b) {
9615   return vpmin_u8(a, b);
9616 }
9617 
9618 // CHECK-LABEL: @test_vpmin_u16(
9619 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9620 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9621 // CHECK:   [[VPMIN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16> %a, <4 x i16> %b)
9622 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <4 x i16> [[VPMIN_V2_I]] to <8 x i8>
9623 // CHECK:   ret <4 x i16> [[VPMIN_V2_I]]
test_vpmin_u16(uint16x4_t a,uint16x4_t b)9624 uint16x4_t test_vpmin_u16(uint16x4_t a, uint16x4_t b) {
9625   return vpmin_u16(a, b);
9626 }
9627 
9628 // CHECK-LABEL: @test_vpmin_u32(
9629 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9630 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9631 // CHECK:   [[VPMIN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32> %a, <2 x i32> %b)
9632 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <2 x i32> [[VPMIN_V2_I]] to <8 x i8>
9633 // CHECK:   ret <2 x i32> [[VPMIN_V2_I]]
test_vpmin_u32(uint32x2_t a,uint32x2_t b)9634 uint32x2_t test_vpmin_u32(uint32x2_t a, uint32x2_t b) {
9635   return vpmin_u32(a, b);
9636 }
9637 
9638 // CHECK-LABEL: @test_vpmin_f32(
9639 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
9640 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
9641 // CHECK:   [[VPMIN_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %a, <2 x float> %b)
9642 // CHECK:   [[VPMIN_V3_I:%.*]] = bitcast <2 x float> [[VPMIN_V2_I]] to <8 x i8>
9643 // CHECK:   ret <2 x float> [[VPMIN_V2_I]]
test_vpmin_f32(float32x2_t a,float32x2_t b)9644 float32x2_t test_vpmin_f32(float32x2_t a, float32x2_t b) {
9645   return vpmin_f32(a, b);
9646 }
9647 
9648 // CHECK-LABEL: @test_vqabs_s8(
9649 // CHECK:   [[VQABS_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqabs.v8i8(<8 x i8> %a)
9650 // CHECK:   ret <8 x i8> [[VQABS_V_I]]
test_vqabs_s8(int8x8_t a)9651 int8x8_t test_vqabs_s8(int8x8_t a) {
9652   return vqabs_s8(a);
9653 }
9654 
9655 // CHECK-LABEL: @test_vqabs_s16(
9656 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9657 // CHECK:   [[VQABS_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqabs.v4i16(<4 x i16> %a)
9658 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <4 x i16> [[VQABS_V1_I]] to <8 x i8>
9659 // CHECK:   ret <4 x i16> [[VQABS_V1_I]]
test_vqabs_s16(int16x4_t a)9660 int16x4_t test_vqabs_s16(int16x4_t a) {
9661   return vqabs_s16(a);
9662 }
9663 
9664 // CHECK-LABEL: @test_vqabs_s32(
9665 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9666 // CHECK:   [[VQABS_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqabs.v2i32(<2 x i32> %a)
9667 // CHECK:   [[VQABS_V2_I:%.*]] = bitcast <2 x i32> [[VQABS_V1_I]] to <8 x i8>
9668 // CHECK:   ret <2 x i32> [[VQABS_V1_I]]
test_vqabs_s32(int32x2_t a)9669 int32x2_t test_vqabs_s32(int32x2_t a) {
9670   return vqabs_s32(a);
9671 }
9672 
9673 // CHECK-LABEL: @test_vqabsq_s8(
9674 // CHECK:   [[VQABSQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqabs.v16i8(<16 x i8> %a)
9675 // CHECK:   ret <16 x i8> [[VQABSQ_V_I]]
test_vqabsq_s8(int8x16_t a)9676 int8x16_t test_vqabsq_s8(int8x16_t a) {
9677   return vqabsq_s8(a);
9678 }
9679 
9680 // CHECK-LABEL: @test_vqabsq_s16(
9681 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9682 // CHECK:   [[VQABSQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqabs.v8i16(<8 x i16> %a)
9683 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <8 x i16> [[VQABSQ_V1_I]] to <16 x i8>
9684 // CHECK:   ret <8 x i16> [[VQABSQ_V1_I]]
test_vqabsq_s16(int16x8_t a)9685 int16x8_t test_vqabsq_s16(int16x8_t a) {
9686   return vqabsq_s16(a);
9687 }
9688 
9689 // CHECK-LABEL: @test_vqabsq_s32(
9690 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9691 // CHECK:   [[VQABSQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqabs.v4i32(<4 x i32> %a)
9692 // CHECK:   [[VQABSQ_V2_I:%.*]] = bitcast <4 x i32> [[VQABSQ_V1_I]] to <16 x i8>
9693 // CHECK:   ret <4 x i32> [[VQABSQ_V1_I]]
test_vqabsq_s32(int32x4_t a)9694 int32x4_t test_vqabsq_s32(int32x4_t a) {
9695   return vqabsq_s32(a);
9696 }
9697 
9698 // CHECK-LABEL: @test_vqadd_s8(
9699 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9700 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_s8(int8x8_t a,int8x8_t b)9701 int8x8_t test_vqadd_s8(int8x8_t a, int8x8_t b) {
9702   return vqadd_s8(a, b);
9703 }
9704 
9705 // CHECK-LABEL: @test_vqadd_s16(
9706 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9707 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9708 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9709 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9710 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_s16(int16x4_t a,int16x4_t b)9711 int16x4_t test_vqadd_s16(int16x4_t a, int16x4_t b) {
9712   return vqadd_s16(a, b);
9713 }
9714 
9715 // CHECK-LABEL: @test_vqadd_s32(
9716 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9717 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9718 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9719 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9720 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_s32(int32x2_t a,int32x2_t b)9721 int32x2_t test_vqadd_s32(int32x2_t a, int32x2_t b) {
9722   return vqadd_s32(a, b);
9723 }
9724 
9725 // CHECK-LABEL: @test_vqadd_s64(
9726 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9727 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9728 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.sadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9729 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9730 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_s64(int64x1_t a,int64x1_t b)9731 int64x1_t test_vqadd_s64(int64x1_t a, int64x1_t b) {
9732   return vqadd_s64(a, b);
9733 }
9734 
9735 // CHECK-LABEL: @test_vqadd_u8(
9736 // CHECK:   [[VQADD_V_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
9737 // CHECK:   ret <8 x i8> [[VQADD_V_I]]
test_vqadd_u8(uint8x8_t a,uint8x8_t b)9738 uint8x8_t test_vqadd_u8(uint8x8_t a, uint8x8_t b) {
9739   return vqadd_u8(a, b);
9740 }
9741 
9742 // CHECK-LABEL: @test_vqadd_u16(
9743 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
9744 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9745 // CHECK:   [[VQADD_V2_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
9746 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <4 x i16> [[VQADD_V2_I]] to <8 x i8>
9747 // CHECK:   ret <4 x i16> [[VQADD_V2_I]]
test_vqadd_u16(uint16x4_t a,uint16x4_t b)9748 uint16x4_t test_vqadd_u16(uint16x4_t a, uint16x4_t b) {
9749   return vqadd_u16(a, b);
9750 }
9751 
9752 // CHECK-LABEL: @test_vqadd_u32(
9753 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
9754 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9755 // CHECK:   [[VQADD_V2_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
9756 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <2 x i32> [[VQADD_V2_I]] to <8 x i8>
9757 // CHECK:   ret <2 x i32> [[VQADD_V2_I]]
test_vqadd_u32(uint32x2_t a,uint32x2_t b)9758 uint32x2_t test_vqadd_u32(uint32x2_t a, uint32x2_t b) {
9759   return vqadd_u32(a, b);
9760 }
9761 
9762 // CHECK-LABEL: @test_vqadd_u64(
9763 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
9764 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
9765 // CHECK:   [[VQADD_V2_I:%.*]] = call <1 x i64> @llvm.uadd.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
9766 // CHECK:   [[VQADD_V3_I:%.*]] = bitcast <1 x i64> [[VQADD_V2_I]] to <8 x i8>
9767 // CHECK:   ret <1 x i64> [[VQADD_V2_I]]
test_vqadd_u64(uint64x1_t a,uint64x1_t b)9768 uint64x1_t test_vqadd_u64(uint64x1_t a, uint64x1_t b) {
9769   return vqadd_u64(a, b);
9770 }
9771 
9772 // CHECK-LABEL: @test_vqaddq_s8(
9773 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9774 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_s8(int8x16_t a,int8x16_t b)9775 int8x16_t test_vqaddq_s8(int8x16_t a, int8x16_t b) {
9776   return vqaddq_s8(a, b);
9777 }
9778 
9779 // CHECK-LABEL: @test_vqaddq_s16(
9780 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9781 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9782 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9783 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9784 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_s16(int16x8_t a,int16x8_t b)9785 int16x8_t test_vqaddq_s16(int16x8_t a, int16x8_t b) {
9786   return vqaddq_s16(a, b);
9787 }
9788 
9789 // CHECK-LABEL: @test_vqaddq_s32(
9790 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9791 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9792 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9793 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9794 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_s32(int32x4_t a,int32x4_t b)9795 int32x4_t test_vqaddq_s32(int32x4_t a, int32x4_t b) {
9796   return vqaddq_s32(a, b);
9797 }
9798 
9799 // CHECK-LABEL: @test_vqaddq_s64(
9800 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9801 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9802 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9803 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9804 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_s64(int64x2_t a,int64x2_t b)9805 int64x2_t test_vqaddq_s64(int64x2_t a, int64x2_t b) {
9806   return vqaddq_s64(a, b);
9807 }
9808 
9809 // CHECK-LABEL: @test_vqaddq_u8(
9810 // CHECK:   [[VQADDQ_V_I:%.*]] = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
9811 // CHECK:   ret <16 x i8> [[VQADDQ_V_I]]
test_vqaddq_u8(uint8x16_t a,uint8x16_t b)9812 uint8x16_t test_vqaddq_u8(uint8x16_t a, uint8x16_t b) {
9813   return vqaddq_u8(a, b);
9814 }
9815 
9816 // CHECK-LABEL: @test_vqaddq_u16(
9817 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
9818 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
9819 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
9820 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VQADDQ_V2_I]] to <16 x i8>
9821 // CHECK:   ret <8 x i16> [[VQADDQ_V2_I]]
test_vqaddq_u16(uint16x8_t a,uint16x8_t b)9822 uint16x8_t test_vqaddq_u16(uint16x8_t a, uint16x8_t b) {
9823   return vqaddq_u16(a, b);
9824 }
9825 
9826 // CHECK-LABEL: @test_vqaddq_u32(
9827 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9828 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
9829 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <4 x i32> @llvm.uadd.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
9830 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VQADDQ_V2_I]] to <16 x i8>
9831 // CHECK:   ret <4 x i32> [[VQADDQ_V2_I]]
test_vqaddq_u32(uint32x4_t a,uint32x4_t b)9832 uint32x4_t test_vqaddq_u32(uint32x4_t a, uint32x4_t b) {
9833   return vqaddq_u32(a, b);
9834 }
9835 
9836 // CHECK-LABEL: @test_vqaddq_u64(
9837 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9838 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
9839 // CHECK:   [[VQADDQ_V2_I:%.*]] = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
9840 // CHECK:   [[VQADDQ_V3_I:%.*]] = bitcast <2 x i64> [[VQADDQ_V2_I]] to <16 x i8>
9841 // CHECK:   ret <2 x i64> [[VQADDQ_V2_I]]
test_vqaddq_u64(uint64x2_t a,uint64x2_t b)9842 uint64x2_t test_vqaddq_u64(uint64x2_t a, uint64x2_t b) {
9843   return vqaddq_u64(a, b);
9844 }
9845 
9846 // CHECK-LABEL: @test_vqdmlal_s16(
9847 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9848 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9849 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9850 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9851 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9852 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_s16(int32x4_t a,int16x4_t b,int16x4_t c)9853 int32x4_t test_vqdmlal_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9854   return vqdmlal_s16(a, b, c);
9855 }
9856 
9857 // CHECK-LABEL: @test_vqdmlal_s32(
9858 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9859 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9860 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9861 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9862 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9863 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_s32(int64x2_t a,int32x2_t b,int32x2_t c)9864 int64x2_t test_vqdmlal_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9865   return vqdmlal_s32(a, b, c);
9866 }
9867 
9868 // CHECK-LABEL: @test_vqdmlal_lane_s16(
9869 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9870 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9871 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9872 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9873 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9874 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9875 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
9876 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
9877 // CHECK:   ret <4 x i32> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9878 int32x4_t test_vqdmlal_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9879   return vqdmlal_lane_s16(a, b, c, 3);
9880 }
9881 
9882 // CHECK-LABEL: @test_vqdmlal_lane_s32(
9883 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9884 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9885 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9886 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9887 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9888 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9889 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
9890 // CHECK:   [[VQDMLAL_V3_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
9891 // CHECK:   ret <2 x i64> [[VQDMLAL_V3_I]]
test_vqdmlal_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9892 int64x2_t test_vqdmlal_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9893   return vqdmlal_lane_s32(a, b, c, 1);
9894 }
9895 
9896 // CHECK-LABEL: @test_vqdmlal_n_s16(
9897 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9898 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9899 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9900 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9901 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9902 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9903 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9904 // CHECK:   [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9905 // CHECK:   [[VQDMLAL_V6_I:%.*]] = call <4 x i32> @llvm.sadd.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9906 // CHECK:   ret <4 x i32> [[VQDMLAL_V6_I]]
test_vqdmlal_n_s16(int32x4_t a,int16x4_t b,int16_t c)9907 int32x4_t test_vqdmlal_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9908   return vqdmlal_n_s16(a, b, c);
9909 }
9910 
9911 // CHECK-LABEL: @test_vqdmlal_n_s32(
9912 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9913 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9914 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9915 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9916 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9917 // CHECK:   [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9918 // CHECK:   [[VQDMLAL_V4_I:%.*]] = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9919 // CHECK:   ret <2 x i64> [[VQDMLAL_V4_I]]
test_vqdmlal_n_s32(int64x2_t a,int32x2_t b,int32_t c)9920 int64x2_t test_vqdmlal_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9921   return vqdmlal_n_s32(a, b, c);
9922 }
9923 
9924 // CHECK-LABEL: @test_vqdmlsl_s16(
9925 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9926 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9927 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %c to <8 x i8>
9928 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> %c)
9929 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL2_I]])
9930 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_s16(int32x4_t a,int16x4_t b,int16x4_t c)9931 int32x4_t test_vqdmlsl_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9932   return vqdmlsl_s16(a, b, c);
9933 }
9934 
9935 // CHECK-LABEL: @test_vqdmlsl_s32(
9936 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9937 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9938 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %c to <8 x i8>
9939 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> %c)
9940 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL2_I]])
9941 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_s32(int64x2_t a,int32x2_t b,int32x2_t c)9942 int64x2_t test_vqdmlsl_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9943   return vqdmlsl_s32(a, b, c);
9944 }
9945 
9946 // CHECK-LABEL: @test_vqdmlsl_lane_s16(
9947 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[C:%.*]] to <8 x i8>
9948 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
9949 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
9950 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
9951 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
9952 // CHECK:   [[TMP4:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
9953 // CHECK:   [[VQDMLAL2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[B]], <4 x i16> [[LANE]])
9954 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> [[A]], <4 x i32> [[VQDMLAL2_I]])
9955 // CHECK:   ret <4 x i32> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s16(int32x4_t a,int16x4_t b,int16x4_t c)9956 int32x4_t test_vqdmlsl_lane_s16(int32x4_t a, int16x4_t b, int16x4_t c) {
9957   return vqdmlsl_lane_s16(a, b, c, 3);
9958 }
9959 
9960 // CHECK-LABEL: @test_vqdmlsl_lane_s32(
9961 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[C:%.*]] to <8 x i8>
9962 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
9963 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
9964 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8>
9965 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
9966 // CHECK:   [[TMP4:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
9967 // CHECK:   [[VQDMLAL2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[B]], <2 x i32> [[LANE]])
9968 // CHECK:   [[VQDMLSL_V3_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> [[A]], <2 x i64> [[VQDMLAL2_I]])
9969 // CHECK:   ret <2 x i64> [[VQDMLSL_V3_I]]
test_vqdmlsl_lane_s32(int64x2_t a,int32x2_t b,int32x2_t c)9970 int64x2_t test_vqdmlsl_lane_s32(int64x2_t a, int32x2_t b, int32x2_t c) {
9971   return vqdmlsl_lane_s32(a, b, c, 1);
9972 }
9973 
9974 // CHECK-LABEL: @test_vqdmlsl_n_s16(
9975 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %c, i32 0
9976 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %c, i32 1
9977 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %c, i32 2
9978 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %c, i32 3
9979 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
9980 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
9981 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
9982 // CHECK:   [[VQDMLAL5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %b, <4 x i16> [[VECINIT3_I]])
9983 // CHECK:   [[VQDMLSL_V6_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> [[VQDMLAL5_I]])
9984 // CHECK:   ret <4 x i32> [[VQDMLSL_V6_I]]
test_vqdmlsl_n_s16(int32x4_t a,int16x4_t b,int16_t c)9985 int32x4_t test_vqdmlsl_n_s16(int32x4_t a, int16x4_t b, int16_t c) {
9986   return vqdmlsl_n_s16(a, b, c);
9987 }
9988 
9989 // CHECK-LABEL: @test_vqdmlsl_n_s32(
9990 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %c, i32 0
9991 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %c, i32 1
9992 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
9993 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
9994 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
9995 // CHECK:   [[VQDMLAL3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %b, <2 x i32> [[VECINIT1_I]])
9996 // CHECK:   [[VQDMLSL_V4_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> [[VQDMLAL3_I]])
9997 // CHECK:   ret <2 x i64> [[VQDMLSL_V4_I]]
test_vqdmlsl_n_s32(int64x2_t a,int32x2_t b,int32_t c)9998 int64x2_t test_vqdmlsl_n_s32(int64x2_t a, int32x2_t b, int32_t c) {
9999   return vqdmlsl_n_s32(a, b, c);
10000 }
10001 
10002 // CHECK-LABEL: @test_vqdmulh_s16(
10003 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10004 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10005 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
10006 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
10007 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_s16(int16x4_t a,int16x4_t b)10008 int16x4_t test_vqdmulh_s16(int16x4_t a, int16x4_t b) {
10009   return vqdmulh_s16(a, b);
10010 }
10011 
10012 // CHECK-LABEL: @test_vqdmulh_s32(
10013 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10014 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10015 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
10016 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
10017 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_s32(int32x2_t a,int32x2_t b)10018 int32x2_t test_vqdmulh_s32(int32x2_t a, int32x2_t b) {
10019   return vqdmulh_s32(a, b);
10020 }
10021 
10022 // CHECK-LABEL: @test_vqdmulhq_s16(
10023 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10024 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10025 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
10026 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
10027 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s16(int16x8_t a,int16x8_t b)10028 int16x8_t test_vqdmulhq_s16(int16x8_t a, int16x8_t b) {
10029   return vqdmulhq_s16(a, b);
10030 }
10031 
10032 // CHECK-LABEL: @test_vqdmulhq_s32(
10033 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10034 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10035 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
10036 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
10037 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_s32(int32x4_t a,int32x4_t b)10038 int32x4_t test_vqdmulhq_s32(int32x4_t a, int32x4_t b) {
10039   return vqdmulhq_s32(a, b);
10040 }
10041 
10042 // CHECK-LABEL: @test_vqdmulh_lane_s16(
10043 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10044 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10045 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10046 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10047 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10048 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
10049 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V2_I]] to <8 x i8>
10050 // CHECK:   ret <4 x i16> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s16(int16x4_t a,int16x4_t b)10051 int16x4_t test_vqdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10052   return vqdmulh_lane_s16(a, b, 3);
10053 }
10054 
10055 // CHECK-LABEL: @test_vqdmulh_lane_s32(
10056 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10057 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10058 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10059 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10060 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10061 // CHECK:   [[VQDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
10062 // CHECK:   [[VQDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V2_I]] to <8 x i8>
10063 // CHECK:   ret <2 x i32> [[VQDMULH_V2_I]]
test_vqdmulh_lane_s32(int32x2_t a,int32x2_t b)10064 int32x2_t test_vqdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10065   return vqdmulh_lane_s32(a, b, 1);
10066 }
10067 
10068 // CHECK-LABEL: @test_vqdmulhq_lane_s16(
10069 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10070 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10071 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10072 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10073 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10074 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
10075 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V2_I]] to <16 x i8>
10076 // CHECK:   ret <8 x i16> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s16(int16x8_t a,int16x4_t b)10077 int16x8_t test_vqdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10078   return vqdmulhq_lane_s16(a, b, 3);
10079 }
10080 
10081 // CHECK-LABEL: @test_vqdmulhq_lane_s32(
10082 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10083 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10084 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10085 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10086 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10087 // CHECK:   [[VQDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
10088 // CHECK:   [[VQDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V2_I]] to <16 x i8>
10089 // CHECK:   ret <4 x i32> [[VQDMULHQ_V2_I]]
test_vqdmulhq_lane_s32(int32x4_t a,int32x2_t b)10090 int32x4_t test_vqdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10091   return vqdmulhq_lane_s32(a, b, 1);
10092 }
10093 
10094 // CHECK-LABEL: @test_vqdmulh_n_s16(
10095 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10096 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10097 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10098 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10099 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10100 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10101 // CHECK:   [[VQDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10102 // CHECK:   [[VQDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQDMULH_V5_I]] to <8 x i8>
10103 // CHECK:   ret <4 x i16> [[VQDMULH_V5_I]]
test_vqdmulh_n_s16(int16x4_t a,int16_t b)10104 int16x4_t test_vqdmulh_n_s16(int16x4_t a, int16_t b) {
10105   return vqdmulh_n_s16(a, b);
10106 }
10107 
10108 // CHECK-LABEL: @test_vqdmulh_n_s32(
10109 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10110 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10111 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10112 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10113 // CHECK:   [[VQDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10114 // CHECK:   [[VQDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQDMULH_V3_I]] to <8 x i8>
10115 // CHECK:   ret <2 x i32> [[VQDMULH_V3_I]]
test_vqdmulh_n_s32(int32x2_t a,int32_t b)10116 int32x2_t test_vqdmulh_n_s32(int32x2_t a, int32_t b) {
10117   return vqdmulh_n_s32(a, b);
10118 }
10119 
10120 // CHECK-LABEL: @test_vqdmulhq_n_s16(
10121 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10122 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10123 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10124 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10125 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10126 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10127 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10128 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10129 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10130 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10131 // CHECK:   [[VQDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10132 // CHECK:   [[VQDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQDMULHQ_V9_I]] to <16 x i8>
10133 // CHECK:   ret <8 x i16> [[VQDMULHQ_V9_I]]
test_vqdmulhq_n_s16(int16x8_t a,int16_t b)10134 int16x8_t test_vqdmulhq_n_s16(int16x8_t a, int16_t b) {
10135   return vqdmulhq_n_s16(a, b);
10136 }
10137 
10138 // CHECK-LABEL: @test_vqdmulhq_n_s32(
10139 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10140 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10141 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10142 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10143 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10144 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10145 // CHECK:   [[VQDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10146 // CHECK:   [[VQDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULHQ_V5_I]] to <16 x i8>
10147 // CHECK:   ret <4 x i32> [[VQDMULHQ_V5_I]]
test_vqdmulhq_n_s32(int32x4_t a,int32_t b)10148 int32x4_t test_vqdmulhq_n_s32(int32x4_t a, int32_t b) {
10149   return vqdmulhq_n_s32(a, b);
10150 }
10151 
10152 // CHECK-LABEL: @test_vqdmull_s16(
10153 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10154 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10155 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> %b)
10156 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10157 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_s16(int16x4_t a,int16x4_t b)10158 int32x4_t test_vqdmull_s16(int16x4_t a, int16x4_t b) {
10159   return vqdmull_s16(a, b);
10160 }
10161 
10162 // CHECK-LABEL: @test_vqdmull_s32(
10163 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10164 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10165 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> %b)
10166 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10167 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_s32(int32x2_t a,int32x2_t b)10168 int64x2_t test_vqdmull_s32(int32x2_t a, int32x2_t b) {
10169   return vqdmull_s32(a, b);
10170 }
10171 
10172 // CHECK-LABEL: @test_vqdmull_lane_s16(
10173 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10174 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10175 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10176 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10177 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10178 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> [[A]], <4 x i16> [[LANE]])
10179 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V2_I]] to <16 x i8>
10180 // CHECK:   ret <4 x i32> [[VQDMULL_V2_I]]
test_vqdmull_lane_s16(int16x4_t a,int16x4_t b)10181 int32x4_t test_vqdmull_lane_s16(int16x4_t a, int16x4_t b) {
10182   return vqdmull_lane_s16(a, b, 3);
10183 }
10184 
10185 // CHECK-LABEL: @test_vqdmull_lane_s32(
10186 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10187 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10188 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10189 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10190 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10191 // CHECK:   [[VQDMULL_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> [[A]], <2 x i32> [[LANE]])
10192 // CHECK:   [[VQDMULL_V3_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V2_I]] to <16 x i8>
10193 // CHECK:   ret <2 x i64> [[VQDMULL_V2_I]]
test_vqdmull_lane_s32(int32x2_t a,int32x2_t b)10194 int64x2_t test_vqdmull_lane_s32(int32x2_t a, int32x2_t b) {
10195   return vqdmull_lane_s32(a, b, 1);
10196 }
10197 
10198 // CHECK-LABEL: @test_vqdmull_n_s16(
10199 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10200 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10201 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10202 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10203 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10204 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10205 // CHECK:   [[VQDMULL_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqdmull.v4i32(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10206 // CHECK:   [[VQDMULL_V6_I:%.*]] = bitcast <4 x i32> [[VQDMULL_V5_I]] to <16 x i8>
10207 // CHECK:   ret <4 x i32> [[VQDMULL_V5_I]]
test_vqdmull_n_s16(int16x4_t a,int16_t b)10208 int32x4_t test_vqdmull_n_s16(int16x4_t a, int16_t b) {
10209   return vqdmull_n_s16(a, b);
10210 }
10211 
10212 // CHECK-LABEL: @test_vqdmull_n_s32(
10213 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10214 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10215 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10216 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10217 // CHECK:   [[VQDMULL_V3_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqdmull.v2i64(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10218 // CHECK:   [[VQDMULL_V4_I:%.*]] = bitcast <2 x i64> [[VQDMULL_V3_I]] to <16 x i8>
10219 // CHECK:   ret <2 x i64> [[VQDMULL_V3_I]]
test_vqdmull_n_s32(int32x2_t a,int32_t b)10220 int64x2_t test_vqdmull_n_s32(int32x2_t a, int32_t b) {
10221   return vqdmull_n_s32(a, b);
10222 }
10223 
10224 // CHECK-LABEL: @test_vqmovn_s16(
10225 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10226 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovns.v8i8(<8 x i16> %a)
10227 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_s16(int16x8_t a)10228 int8x8_t test_vqmovn_s16(int16x8_t a) {
10229   return vqmovn_s16(a);
10230 }
10231 
10232 // CHECK-LABEL: @test_vqmovn_s32(
10233 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10234 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovns.v4i16(<4 x i32> %a)
10235 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10236 // CHECK:   ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_s32(int32x4_t a)10237 int16x4_t test_vqmovn_s32(int32x4_t a) {
10238   return vqmovn_s32(a);
10239 }
10240 
10241 // CHECK-LABEL: @test_vqmovn_s64(
10242 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10243 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovns.v2i32(<2 x i64> %a)
10244 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10245 // CHECK:   ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_s64(int64x2_t a)10246 int32x2_t test_vqmovn_s64(int64x2_t a) {
10247   return vqmovn_s64(a);
10248 }
10249 
10250 // CHECK-LABEL: @test_vqmovn_u16(
10251 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10252 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnu.v8i8(<8 x i16> %a)
10253 // CHECK:   ret <8 x i8> [[VQMOVN_V1_I]]
test_vqmovn_u16(uint16x8_t a)10254 uint8x8_t test_vqmovn_u16(uint16x8_t a) {
10255   return vqmovn_u16(a);
10256 }
10257 
10258 // CHECK-LABEL: @test_vqmovn_u32(
10259 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10260 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnu.v4i16(<4 x i32> %a)
10261 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVN_V1_I]] to <8 x i8>
10262 // CHECK:   ret <4 x i16> [[VQMOVN_V1_I]]
test_vqmovn_u32(uint32x4_t a)10263 uint16x4_t test_vqmovn_u32(uint32x4_t a) {
10264   return vqmovn_u32(a);
10265 }
10266 
10267 // CHECK-LABEL: @test_vqmovn_u64(
10268 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10269 // CHECK:   [[VQMOVN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnu.v2i32(<2 x i64> %a)
10270 // CHECK:   [[VQMOVN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVN_V1_I]] to <8 x i8>
10271 // CHECK:   ret <2 x i32> [[VQMOVN_V1_I]]
test_vqmovn_u64(uint64x2_t a)10272 uint32x2_t test_vqmovn_u64(uint64x2_t a) {
10273   return vqmovn_u64(a);
10274 }
10275 
10276 // CHECK-LABEL: @test_vqmovun_s16(
10277 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10278 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqmovnsu.v8i8(<8 x i16> %a)
10279 // CHECK:   ret <8 x i8> [[VQMOVUN_V1_I]]
test_vqmovun_s16(int16x8_t a)10280 uint8x8_t test_vqmovun_s16(int16x8_t a) {
10281   return vqmovun_s16(a);
10282 }
10283 
10284 // CHECK-LABEL: @test_vqmovun_s32(
10285 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10286 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqmovnsu.v4i16(<4 x i32> %a)
10287 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <4 x i16> [[VQMOVUN_V1_I]] to <8 x i8>
10288 // CHECK:   ret <4 x i16> [[VQMOVUN_V1_I]]
test_vqmovun_s32(int32x4_t a)10289 uint16x4_t test_vqmovun_s32(int32x4_t a) {
10290   return vqmovun_s32(a);
10291 }
10292 
10293 // CHECK-LABEL: @test_vqmovun_s64(
10294 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10295 // CHECK:   [[VQMOVUN_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqmovnsu.v2i32(<2 x i64> %a)
10296 // CHECK:   [[VQMOVUN_V2_I:%.*]] = bitcast <2 x i32> [[VQMOVUN_V1_I]] to <8 x i8>
10297 // CHECK:   ret <2 x i32> [[VQMOVUN_V1_I]]
test_vqmovun_s64(int64x2_t a)10298 uint32x2_t test_vqmovun_s64(int64x2_t a) {
10299   return vqmovun_s64(a);
10300 }
10301 
10302 // CHECK-LABEL: @test_vqneg_s8(
10303 // CHECK:   [[VQNEG_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqneg.v8i8(<8 x i8> %a)
10304 // CHECK:   ret <8 x i8> [[VQNEG_V_I]]
test_vqneg_s8(int8x8_t a)10305 int8x8_t test_vqneg_s8(int8x8_t a) {
10306   return vqneg_s8(a);
10307 }
10308 
10309 // CHECK-LABEL: @test_vqneg_s16(
10310 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10311 // CHECK:   [[VQNEG_V1_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqneg.v4i16(<4 x i16> %a)
10312 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <4 x i16> [[VQNEG_V1_I]] to <8 x i8>
10313 // CHECK:   ret <4 x i16> [[VQNEG_V1_I]]
test_vqneg_s16(int16x4_t a)10314 int16x4_t test_vqneg_s16(int16x4_t a) {
10315   return vqneg_s16(a);
10316 }
10317 
10318 // CHECK-LABEL: @test_vqneg_s32(
10319 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10320 // CHECK:   [[VQNEG_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqneg.v2i32(<2 x i32> %a)
10321 // CHECK:   [[VQNEG_V2_I:%.*]] = bitcast <2 x i32> [[VQNEG_V1_I]] to <8 x i8>
10322 // CHECK:   ret <2 x i32> [[VQNEG_V1_I]]
test_vqneg_s32(int32x2_t a)10323 int32x2_t test_vqneg_s32(int32x2_t a) {
10324   return vqneg_s32(a);
10325 }
10326 
10327 // CHECK-LABEL: @test_vqnegq_s8(
10328 // CHECK:   [[VQNEGQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqneg.v16i8(<16 x i8> %a)
10329 // CHECK:   ret <16 x i8> [[VQNEGQ_V_I]]
test_vqnegq_s8(int8x16_t a)10330 int8x16_t test_vqnegq_s8(int8x16_t a) {
10331   return vqnegq_s8(a);
10332 }
10333 
10334 // CHECK-LABEL: @test_vqnegq_s16(
10335 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10336 // CHECK:   [[VQNEGQ_V1_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqneg.v8i16(<8 x i16> %a)
10337 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <8 x i16> [[VQNEGQ_V1_I]] to <16 x i8>
10338 // CHECK:   ret <8 x i16> [[VQNEGQ_V1_I]]
test_vqnegq_s16(int16x8_t a)10339 int16x8_t test_vqnegq_s16(int16x8_t a) {
10340   return vqnegq_s16(a);
10341 }
10342 
10343 // CHECK-LABEL: @test_vqnegq_s32(
10344 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10345 // CHECK:   [[VQNEGQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqneg.v4i32(<4 x i32> %a)
10346 // CHECK:   [[VQNEGQ_V2_I:%.*]] = bitcast <4 x i32> [[VQNEGQ_V1_I]] to <16 x i8>
10347 // CHECK:   ret <4 x i32> [[VQNEGQ_V1_I]]
test_vqnegq_s32(int32x4_t a)10348 int32x4_t test_vqnegq_s32(int32x4_t a) {
10349   return vqnegq_s32(a);
10350 }
10351 
10352 // CHECK-LABEL: @test_vqrdmulh_s16(
10353 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10354 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10355 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> %b)
10356 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10357 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_s16(int16x4_t a,int16x4_t b)10358 int16x4_t test_vqrdmulh_s16(int16x4_t a, int16x4_t b) {
10359   return vqrdmulh_s16(a, b);
10360 }
10361 
10362 // CHECK-LABEL: @test_vqrdmulh_s32(
10363 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10364 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10365 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> %b)
10366 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10367 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_s32(int32x2_t a,int32x2_t b)10368 int32x2_t test_vqrdmulh_s32(int32x2_t a, int32x2_t b) {
10369   return vqrdmulh_s32(a, b);
10370 }
10371 
10372 // CHECK-LABEL: @test_vqrdmulhq_s16(
10373 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10374 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10375 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> %b)
10376 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10377 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s16(int16x8_t a,int16x8_t b)10378 int16x8_t test_vqrdmulhq_s16(int16x8_t a, int16x8_t b) {
10379   return vqrdmulhq_s16(a, b);
10380 }
10381 
10382 // CHECK-LABEL: @test_vqrdmulhq_s32(
10383 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10384 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10385 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> %b)
10386 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10387 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_s32(int32x4_t a,int32x4_t b)10388 int32x4_t test_vqrdmulhq_s32(int32x4_t a, int32x4_t b) {
10389   return vqrdmulhq_s32(a, b);
10390 }
10391 
10392 // CHECK-LABEL: @test_vqrdmulh_lane_s16(
10393 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10394 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10395 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <4 x i32> <i32 3, i32 3, i32 3, i32 3>
10396 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8>
10397 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <8 x i8>
10398 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> [[A]], <4 x i16> [[LANE]])
10399 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V2_I]] to <8 x i8>
10400 // CHECK:   ret <4 x i16> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s16(int16x4_t a,int16x4_t b)10401 int16x4_t test_vqrdmulh_lane_s16(int16x4_t a, int16x4_t b) {
10402   return vqrdmulh_lane_s16(a, b, 3);
10403 }
10404 
10405 // CHECK-LABEL: @test_vqrdmulh_lane_s32(
10406 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10407 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10408 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <2 x i32> <i32 1, i32 1>
10409 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
10410 // CHECK:   [[TMP3:%.*]] = bitcast <2 x i32> [[LANE]] to <8 x i8>
10411 // CHECK:   [[VQRDMULH_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> [[A]], <2 x i32> [[LANE]])
10412 // CHECK:   [[VQRDMULH_V3_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V2_I]] to <8 x i8>
10413 // CHECK:   ret <2 x i32> [[VQRDMULH_V2_I]]
test_vqrdmulh_lane_s32(int32x2_t a,int32x2_t b)10414 int32x2_t test_vqrdmulh_lane_s32(int32x2_t a, int32x2_t b) {
10415   return vqrdmulh_lane_s32(a, b, 1);
10416 }
10417 
10418 // CHECK-LABEL: @test_vqrdmulhq_lane_s16(
10419 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8>
10420 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10421 // CHECK:   [[LANE:%.*]] = shufflevector <4 x i16> [[TMP1]], <4 x i16> [[TMP1]], <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
10422 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8>
10423 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <16 x i8>
10424 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> [[A]], <8 x i16> [[LANE]])
10425 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V2_I]] to <16 x i8>
10426 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s16(int16x8_t a,int16x4_t b)10427 int16x8_t test_vqrdmulhq_lane_s16(int16x8_t a, int16x4_t b) {
10428   return vqrdmulhq_lane_s16(a, b, 3);
10429 }
10430 
10431 // CHECK-LABEL: @test_vqrdmulhq_lane_s32(
10432 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> [[B:%.*]] to <8 x i8>
10433 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10434 // CHECK:   [[LANE:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> [[TMP1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>
10435 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8>
10436 // CHECK:   [[TMP3:%.*]] = bitcast <4 x i32> [[LANE]] to <16 x i8>
10437 // CHECK:   [[VQRDMULHQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> [[A]], <4 x i32> [[LANE]])
10438 // CHECK:   [[VQRDMULHQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V2_I]] to <16 x i8>
10439 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V2_I]]
test_vqrdmulhq_lane_s32(int32x4_t a,int32x2_t b)10440 int32x4_t test_vqrdmulhq_lane_s32(int32x4_t a, int32x2_t b) {
10441   return vqrdmulhq_lane_s32(a, b, 1);
10442 }
10443 
10444 // CHECK-LABEL: @test_vqrdmulh_n_s16(
10445 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i16> undef, i16 %b, i32 0
10446 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i16> [[VECINIT_I]], i16 %b, i32 1
10447 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i16> [[VECINIT1_I]], i16 %b, i32 2
10448 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i16> [[VECINIT2_I]], i16 %b, i32 3
10449 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10450 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to <8 x i8>
10451 // CHECK:   [[VQRDMULH_V5_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16> %a, <4 x i16> [[VECINIT3_I]])
10452 // CHECK:   [[VQRDMULH_V6_I:%.*]] = bitcast <4 x i16> [[VQRDMULH_V5_I]] to <8 x i8>
10453 // CHECK:   ret <4 x i16> [[VQRDMULH_V5_I]]
test_vqrdmulh_n_s16(int16x4_t a,int16_t b)10454 int16x4_t test_vqrdmulh_n_s16(int16x4_t a, int16_t b) {
10455   return vqrdmulh_n_s16(a, b);
10456 }
10457 
10458 // CHECK-LABEL: @test_vqrdmulh_n_s32(
10459 // CHECK:   [[VECINIT_I:%.*]] = insertelement <2 x i32> undef, i32 %b, i32 0
10460 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <2 x i32> [[VECINIT_I]], i32 %b, i32 1
10461 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10462 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to <8 x i8>
10463 // CHECK:   [[VQRDMULH_V3_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32> %a, <2 x i32> [[VECINIT1_I]])
10464 // CHECK:   [[VQRDMULH_V4_I:%.*]] = bitcast <2 x i32> [[VQRDMULH_V3_I]] to <8 x i8>
10465 // CHECK:   ret <2 x i32> [[VQRDMULH_V3_I]]
test_vqrdmulh_n_s32(int32x2_t a,int32_t b)10466 int32x2_t test_vqrdmulh_n_s32(int32x2_t a, int32_t b) {
10467   return vqrdmulh_n_s32(a, b);
10468 }
10469 
10470 // CHECK-LABEL: @test_vqrdmulhq_n_s16(
10471 // CHECK:   [[VECINIT_I:%.*]] = insertelement <8 x i16> undef, i16 %b, i32 0
10472 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <8 x i16> [[VECINIT_I]], i16 %b, i32 1
10473 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <8 x i16> [[VECINIT1_I]], i16 %b, i32 2
10474 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <8 x i16> [[VECINIT2_I]], i16 %b, i32 3
10475 // CHECK:   [[VECINIT4_I:%.*]] = insertelement <8 x i16> [[VECINIT3_I]], i16 %b, i32 4
10476 // CHECK:   [[VECINIT5_I:%.*]] = insertelement <8 x i16> [[VECINIT4_I]], i16 %b, i32 5
10477 // CHECK:   [[VECINIT6_I:%.*]] = insertelement <8 x i16> [[VECINIT5_I]], i16 %b, i32 6
10478 // CHECK:   [[VECINIT7_I:%.*]] = insertelement <8 x i16> [[VECINIT6_I]], i16 %b, i32 7
10479 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10480 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> [[VECINIT7_I]] to <16 x i8>
10481 // CHECK:   [[VQRDMULHQ_V9_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrdmulh.v8i16(<8 x i16> %a, <8 x i16> [[VECINIT7_I]])
10482 // CHECK:   [[VQRDMULHQ_V10_I:%.*]] = bitcast <8 x i16> [[VQRDMULHQ_V9_I]] to <16 x i8>
10483 // CHECK:   ret <8 x i16> [[VQRDMULHQ_V9_I]]
test_vqrdmulhq_n_s16(int16x8_t a,int16_t b)10484 int16x8_t test_vqrdmulhq_n_s16(int16x8_t a, int16_t b) {
10485   return vqrdmulhq_n_s16(a, b);
10486 }
10487 
10488 // CHECK-LABEL: @test_vqrdmulhq_n_s32(
10489 // CHECK:   [[VECINIT_I:%.*]] = insertelement <4 x i32> undef, i32 %b, i32 0
10490 // CHECK:   [[VECINIT1_I:%.*]] = insertelement <4 x i32> [[VECINIT_I]], i32 %b, i32 1
10491 // CHECK:   [[VECINIT2_I:%.*]] = insertelement <4 x i32> [[VECINIT1_I]], i32 %b, i32 2
10492 // CHECK:   [[VECINIT3_I:%.*]] = insertelement <4 x i32> [[VECINIT2_I]], i32 %b, i32 3
10493 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10494 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> [[VECINIT3_I]] to <16 x i8>
10495 // CHECK:   [[VQRDMULHQ_V5_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrdmulh.v4i32(<4 x i32> %a, <4 x i32> [[VECINIT3_I]])
10496 // CHECK:   [[VQRDMULHQ_V6_I:%.*]] = bitcast <4 x i32> [[VQRDMULHQ_V5_I]] to <16 x i8>
10497 // CHECK:   ret <4 x i32> [[VQRDMULHQ_V5_I]]
test_vqrdmulhq_n_s32(int32x4_t a,int32_t b)10498 int32x4_t test_vqrdmulhq_n_s32(int32x4_t a, int32_t b) {
10499   return vqrdmulhq_n_s32(a, b);
10500 }
10501 
10502 // CHECK-LABEL: @test_vqrshl_s8(
10503 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10504 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_s8(int8x8_t a,int8x8_t b)10505 int8x8_t test_vqrshl_s8(int8x8_t a, int8x8_t b) {
10506   return vqrshl_s8(a, b);
10507 }
10508 
10509 // CHECK-LABEL: @test_vqrshl_s16(
10510 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10511 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10512 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10513 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10514 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_s16(int16x4_t a,int16x4_t b)10515 int16x4_t test_vqrshl_s16(int16x4_t a, int16x4_t b) {
10516   return vqrshl_s16(a, b);
10517 }
10518 
10519 // CHECK-LABEL: @test_vqrshl_s32(
10520 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10521 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10522 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10523 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10524 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_s32(int32x2_t a,int32x2_t b)10525 int32x2_t test_vqrshl_s32(int32x2_t a, int32x2_t b) {
10526   return vqrshl_s32(a, b);
10527 }
10528 
10529 // CHECK-LABEL: @test_vqrshl_s64(
10530 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10531 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10532 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10533 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10534 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_s64(int64x1_t a,int64x1_t b)10535 int64x1_t test_vqrshl_s64(int64x1_t a, int64x1_t b) {
10536   return vqrshl_s64(a, b);
10537 }
10538 
10539 // CHECK-LABEL: @test_vqrshl_u8(
10540 // CHECK:   [[VQRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10541 // CHECK:   ret <8 x i8> [[VQRSHL_V_I]]
test_vqrshl_u8(uint8x8_t a,int8x8_t b)10542 uint8x8_t test_vqrshl_u8(uint8x8_t a, int8x8_t b) {
10543   return vqrshl_u8(a, b);
10544 }
10545 
10546 // CHECK-LABEL: @test_vqrshl_u16(
10547 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10548 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10549 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10550 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQRSHL_V2_I]] to <8 x i8>
10551 // CHECK:   ret <4 x i16> [[VQRSHL_V2_I]]
test_vqrshl_u16(uint16x4_t a,int16x4_t b)10552 uint16x4_t test_vqrshl_u16(uint16x4_t a, int16x4_t b) {
10553   return vqrshl_u16(a, b);
10554 }
10555 
10556 // CHECK-LABEL: @test_vqrshl_u32(
10557 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10558 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10559 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10560 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQRSHL_V2_I]] to <8 x i8>
10561 // CHECK:   ret <2 x i32> [[VQRSHL_V2_I]]
test_vqrshl_u32(uint32x2_t a,int32x2_t b)10562 uint32x2_t test_vqrshl_u32(uint32x2_t a, int32x2_t b) {
10563   return vqrshl_u32(a, b);
10564 }
10565 
10566 // CHECK-LABEL: @test_vqrshl_u64(
10567 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10568 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10569 // CHECK:   [[VQRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10570 // CHECK:   [[VQRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQRSHL_V2_I]] to <8 x i8>
10571 // CHECK:   ret <1 x i64> [[VQRSHL_V2_I]]
test_vqrshl_u64(uint64x1_t a,int64x1_t b)10572 uint64x1_t test_vqrshl_u64(uint64x1_t a, int64x1_t b) {
10573   return vqrshl_u64(a, b);
10574 }
10575 
10576 // CHECK-LABEL: @test_vqrshlq_s8(
10577 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10578 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_s8(int8x16_t a,int8x16_t b)10579 int8x16_t test_vqrshlq_s8(int8x16_t a, int8x16_t b) {
10580   return vqrshlq_s8(a, b);
10581 }
10582 
10583 // CHECK-LABEL: @test_vqrshlq_s16(
10584 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10585 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10586 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10587 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10588 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_s16(int16x8_t a,int16x8_t b)10589 int16x8_t test_vqrshlq_s16(int16x8_t a, int16x8_t b) {
10590   return vqrshlq_s16(a, b);
10591 }
10592 
10593 // CHECK-LABEL: @test_vqrshlq_s32(
10594 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10595 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10596 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10597 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10598 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_s32(int32x4_t a,int32x4_t b)10599 int32x4_t test_vqrshlq_s32(int32x4_t a, int32x4_t b) {
10600   return vqrshlq_s32(a, b);
10601 }
10602 
10603 // CHECK-LABEL: @test_vqrshlq_s64(
10604 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10605 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10606 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10607 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10608 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_s64(int64x2_t a,int64x2_t b)10609 int64x2_t test_vqrshlq_s64(int64x2_t a, int64x2_t b) {
10610   return vqrshlq_s64(a, b);
10611 }
10612 
10613 // CHECK-LABEL: @test_vqrshlq_u8(
10614 // CHECK:   [[VQRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10615 // CHECK:   ret <16 x i8> [[VQRSHLQ_V_I]]
test_vqrshlq_u8(uint8x16_t a,int8x16_t b)10616 uint8x16_t test_vqrshlq_u8(uint8x16_t a, int8x16_t b) {
10617   return vqrshlq_u8(a, b);
10618 }
10619 
10620 // CHECK-LABEL: @test_vqrshlq_u16(
10621 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10622 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10623 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10624 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQRSHLQ_V2_I]] to <16 x i8>
10625 // CHECK:   ret <8 x i16> [[VQRSHLQ_V2_I]]
test_vqrshlq_u16(uint16x8_t a,int16x8_t b)10626 uint16x8_t test_vqrshlq_u16(uint16x8_t a, int16x8_t b) {
10627   return vqrshlq_u16(a, b);
10628 }
10629 
10630 // CHECK-LABEL: @test_vqrshlq_u32(
10631 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10632 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10633 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10634 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQRSHLQ_V2_I]] to <16 x i8>
10635 // CHECK:   ret <4 x i32> [[VQRSHLQ_V2_I]]
test_vqrshlq_u32(uint32x4_t a,int32x4_t b)10636 uint32x4_t test_vqrshlq_u32(uint32x4_t a, int32x4_t b) {
10637   return vqrshlq_u32(a, b);
10638 }
10639 
10640 // CHECK-LABEL: @test_vqrshlq_u64(
10641 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10642 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10643 // CHECK:   [[VQRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10644 // CHECK:   [[VQRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQRSHLQ_V2_I]] to <16 x i8>
10645 // CHECK:   ret <2 x i64> [[VQRSHLQ_V2_I]]
test_vqrshlq_u64(uint64x2_t a,int64x2_t b)10646 uint64x2_t test_vqrshlq_u64(uint64x2_t a, int64x2_t b) {
10647   return vqrshlq_u64(a, b);
10648 }
10649 
10650 // CHECK-LABEL: @test_vqrshrn_n_s16(
10651 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10652 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10653 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftns.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10654 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_s16(int16x8_t a)10655 int8x8_t test_vqrshrn_n_s16(int16x8_t a) {
10656   return vqrshrn_n_s16(a, 1);
10657 }
10658 
10659 // CHECK-LABEL: @test_vqrshrn_n_s32(
10660 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10661 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10662 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftns.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10663 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_s32(int32x4_t a)10664 int16x4_t test_vqrshrn_n_s32(int32x4_t a) {
10665   return vqrshrn_n_s32(a, 1);
10666 }
10667 
10668 // CHECK-LABEL: @test_vqrshrn_n_s64(
10669 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10670 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10671 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftns.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10672 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_s64(int64x2_t a)10673 int32x2_t test_vqrshrn_n_s64(int64x2_t a) {
10674   return vqrshrn_n_s64(a, 1);
10675 }
10676 
10677 // CHECK-LABEL: @test_vqrshrn_n_u16(
10678 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10679 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10680 // CHECK:   [[VQRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnu.v8i8(<8 x i16> [[VQRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10681 // CHECK:   ret <8 x i8> [[VQRSHRN_N1]]
test_vqrshrn_n_u16(uint16x8_t a)10682 uint8x8_t test_vqrshrn_n_u16(uint16x8_t a) {
10683   return vqrshrn_n_u16(a, 1);
10684 }
10685 
10686 // CHECK-LABEL: @test_vqrshrn_n_u32(
10687 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10688 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10689 // CHECK:   [[VQRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnu.v4i16(<4 x i32> [[VQRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10690 // CHECK:   ret <4 x i16> [[VQRSHRN_N1]]
test_vqrshrn_n_u32(uint32x4_t a)10691 uint16x4_t test_vqrshrn_n_u32(uint32x4_t a) {
10692   return vqrshrn_n_u32(a, 1);
10693 }
10694 
10695 // CHECK-LABEL: @test_vqrshrn_n_u64(
10696 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10697 // CHECK:   [[VQRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10698 // CHECK:   [[VQRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnu.v2i32(<2 x i64> [[VQRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
10699 // CHECK:   ret <2 x i32> [[VQRSHRN_N1]]
test_vqrshrn_n_u64(uint64x2_t a)10700 uint32x2_t test_vqrshrn_n_u64(uint64x2_t a) {
10701   return vqrshrn_n_u64(a, 1);
10702 }
10703 
10704 // CHECK-LABEL: @test_vqrshrun_n_s16(
10705 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10706 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10707 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqrshiftnsu.v8i8(<8 x i16> [[VQRSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
10708 // CHECK:   ret <8 x i8> [[VQRSHRUN_N1]]
test_vqrshrun_n_s16(int16x8_t a)10709 uint8x8_t test_vqrshrun_n_s16(int16x8_t a) {
10710   return vqrshrun_n_s16(a, 1);
10711 }
10712 
10713 // CHECK-LABEL: @test_vqrshrun_n_s32(
10714 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10715 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10716 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqrshiftnsu.v4i16(<4 x i32> [[VQRSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
10717 // CHECK:   ret <4 x i16> [[VQRSHRUN_N1]]
test_vqrshrun_n_s32(int32x4_t a)10718 uint16x4_t test_vqrshrun_n_s32(int32x4_t a) {
10719   return vqrshrun_n_s32(a, 1);
10720 }
10721 
10722 // CHECK-LABEL: @test_vqrshrun_n_s64(
10723 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10724 // CHECK:   [[VQRSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10725 // CHECK:   [[VQRSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqrshiftnsu.v2i32(<2 x i64> [[VQRSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
10726 // CHECK:   ret <2 x i32> [[VQRSHRUN_N1]]
test_vqrshrun_n_s64(int64x2_t a)10727 uint32x2_t test_vqrshrun_n_s64(int64x2_t a) {
10728   return vqrshrun_n_s64(a, 1);
10729 }
10730 
10731 // CHECK-LABEL: @test_vqshl_s8(
10732 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
10733 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_s8(int8x8_t a,int8x8_t b)10734 int8x8_t test_vqshl_s8(int8x8_t a, int8x8_t b) {
10735   return vqshl_s8(a, b);
10736 }
10737 
10738 // CHECK-LABEL: @test_vqshl_s16(
10739 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10740 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10741 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
10742 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10743 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_s16(int16x4_t a,int16x4_t b)10744 int16x4_t test_vqshl_s16(int16x4_t a, int16x4_t b) {
10745   return vqshl_s16(a, b);
10746 }
10747 
10748 // CHECK-LABEL: @test_vqshl_s32(
10749 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10750 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10751 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
10752 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10753 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_s32(int32x2_t a,int32x2_t b)10754 int32x2_t test_vqshl_s32(int32x2_t a, int32x2_t b) {
10755   return vqshl_s32(a, b);
10756 }
10757 
10758 // CHECK-LABEL: @test_vqshl_s64(
10759 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10760 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10761 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
10762 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10763 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_s64(int64x1_t a,int64x1_t b)10764 int64x1_t test_vqshl_s64(int64x1_t a, int64x1_t b) {
10765   return vqshl_s64(a, b);
10766 }
10767 
10768 // CHECK-LABEL: @test_vqshl_u8(
10769 // CHECK:   [[VQSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
10770 // CHECK:   ret <8 x i8> [[VQSHL_V_I]]
test_vqshl_u8(uint8x8_t a,int8x8_t b)10771 uint8x8_t test_vqshl_u8(uint8x8_t a, int8x8_t b) {
10772   return vqshl_u8(a, b);
10773 }
10774 
10775 // CHECK-LABEL: @test_vqshl_u16(
10776 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10777 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
10778 // CHECK:   [[VQSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
10779 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <4 x i16> [[VQSHL_V2_I]] to <8 x i8>
10780 // CHECK:   ret <4 x i16> [[VQSHL_V2_I]]
test_vqshl_u16(uint16x4_t a,int16x4_t b)10781 uint16x4_t test_vqshl_u16(uint16x4_t a, int16x4_t b) {
10782   return vqshl_u16(a, b);
10783 }
10784 
10785 // CHECK-LABEL: @test_vqshl_u32(
10786 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10787 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
10788 // CHECK:   [[VQSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
10789 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <2 x i32> [[VQSHL_V2_I]] to <8 x i8>
10790 // CHECK:   ret <2 x i32> [[VQSHL_V2_I]]
test_vqshl_u32(uint32x2_t a,int32x2_t b)10791 uint32x2_t test_vqshl_u32(uint32x2_t a, int32x2_t b) {
10792   return vqshl_u32(a, b);
10793 }
10794 
10795 // CHECK-LABEL: @test_vqshl_u64(
10796 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10797 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
10798 // CHECK:   [[VQSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
10799 // CHECK:   [[VQSHL_V3_I:%.*]] = bitcast <1 x i64> [[VQSHL_V2_I]] to <8 x i8>
10800 // CHECK:   ret <1 x i64> [[VQSHL_V2_I]]
test_vqshl_u64(uint64x1_t a,int64x1_t b)10801 uint64x1_t test_vqshl_u64(uint64x1_t a, int64x1_t b) {
10802   return vqshl_u64(a, b);
10803 }
10804 
10805 // CHECK-LABEL: @test_vqshlq_s8(
10806 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
10807 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_s8(int8x16_t a,int8x16_t b)10808 int8x16_t test_vqshlq_s8(int8x16_t a, int8x16_t b) {
10809   return vqshlq_s8(a, b);
10810 }
10811 
10812 // CHECK-LABEL: @test_vqshlq_s16(
10813 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10814 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10815 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
10816 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10817 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_s16(int16x8_t a,int16x8_t b)10818 int16x8_t test_vqshlq_s16(int16x8_t a, int16x8_t b) {
10819   return vqshlq_s16(a, b);
10820 }
10821 
10822 // CHECK-LABEL: @test_vqshlq_s32(
10823 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10824 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10825 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
10826 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10827 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_s32(int32x4_t a,int32x4_t b)10828 int32x4_t test_vqshlq_s32(int32x4_t a, int32x4_t b) {
10829   return vqshlq_s32(a, b);
10830 }
10831 
10832 // CHECK-LABEL: @test_vqshlq_s64(
10833 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10834 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10835 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
10836 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10837 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_s64(int64x2_t a,int64x2_t b)10838 int64x2_t test_vqshlq_s64(int64x2_t a, int64x2_t b) {
10839   return vqshlq_s64(a, b);
10840 }
10841 
10842 // CHECK-LABEL: @test_vqshlq_u8(
10843 // CHECK:   [[VQSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
10844 // CHECK:   ret <16 x i8> [[VQSHLQ_V_I]]
test_vqshlq_u8(uint8x16_t a,int8x16_t b)10845 uint8x16_t test_vqshlq_u8(uint8x16_t a, int8x16_t b) {
10846   return vqshlq_u8(a, b);
10847 }
10848 
10849 // CHECK-LABEL: @test_vqshlq_u16(
10850 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10851 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
10852 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
10853 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSHLQ_V2_I]] to <16 x i8>
10854 // CHECK:   ret <8 x i16> [[VQSHLQ_V2_I]]
test_vqshlq_u16(uint16x8_t a,int16x8_t b)10855 uint16x8_t test_vqshlq_u16(uint16x8_t a, int16x8_t b) {
10856   return vqshlq_u16(a, b);
10857 }
10858 
10859 // CHECK-LABEL: @test_vqshlq_u32(
10860 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10861 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
10862 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
10863 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSHLQ_V2_I]] to <16 x i8>
10864 // CHECK:   ret <4 x i32> [[VQSHLQ_V2_I]]
test_vqshlq_u32(uint32x4_t a,int32x4_t b)10865 uint32x4_t test_vqshlq_u32(uint32x4_t a, int32x4_t b) {
10866   return vqshlq_u32(a, b);
10867 }
10868 
10869 // CHECK-LABEL: @test_vqshlq_u64(
10870 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10871 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
10872 // CHECK:   [[VQSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
10873 // CHECK:   [[VQSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSHLQ_V2_I]] to <16 x i8>
10874 // CHECK:   ret <2 x i64> [[VQSHLQ_V2_I]]
test_vqshlq_u64(uint64x2_t a,int64x2_t b)10875 uint64x2_t test_vqshlq_u64(uint64x2_t a, int64x2_t b) {
10876   return vqshlq_u64(a, b);
10877 }
10878 
10879 // CHECK-LABEL: @test_vqshlu_n_s8(
10880 // CHECK:   [[VQSHLU_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftsu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10881 // CHECK:   ret <8 x i8> [[VQSHLU_N]]
test_vqshlu_n_s8(int8x8_t a)10882 uint8x8_t test_vqshlu_n_s8(int8x8_t a) {
10883   return vqshlu_n_s8(a, 1);
10884 }
10885 
10886 // CHECK-LABEL: @test_vqshlu_n_s16(
10887 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10888 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10889 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftsu.v4i16(<4 x i16> [[VQSHLU_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10890 // CHECK:   ret <4 x i16> [[VQSHLU_N1]]
test_vqshlu_n_s16(int16x4_t a)10891 uint16x4_t test_vqshlu_n_s16(int16x4_t a) {
10892   return vqshlu_n_s16(a, 1);
10893 }
10894 
10895 // CHECK-LABEL: @test_vqshlu_n_s32(
10896 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10897 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10898 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftsu.v2i32(<2 x i32> [[VQSHLU_N]], <2 x i32> <i32 1, i32 1>)
10899 // CHECK:   ret <2 x i32> [[VQSHLU_N1]]
test_vqshlu_n_s32(int32x2_t a)10900 uint32x2_t test_vqshlu_n_s32(int32x2_t a) {
10901   return vqshlu_n_s32(a, 1);
10902 }
10903 
10904 // CHECK-LABEL: @test_vqshlu_n_s64(
10905 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10906 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10907 // CHECK:   [[VQSHLU_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftsu.v1i64(<1 x i64> [[VQSHLU_N]], <1 x i64> <i64 1>)
10908 // CHECK:   ret <1 x i64> [[VQSHLU_N1]]
test_vqshlu_n_s64(int64x1_t a)10909 uint64x1_t test_vqshlu_n_s64(int64x1_t a) {
10910   return vqshlu_n_s64(a, 1);
10911 }
10912 
10913 // CHECK-LABEL: @test_vqshluq_n_s8(
10914 // CHECK:   [[VQSHLU_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftsu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10915 // CHECK:   ret <16 x i8> [[VQSHLU_N]]
test_vqshluq_n_s8(int8x16_t a)10916 uint8x16_t test_vqshluq_n_s8(int8x16_t a) {
10917   return vqshluq_n_s8(a, 1);
10918 }
10919 
10920 // CHECK-LABEL: @test_vqshluq_n_s16(
10921 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
10922 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
10923 // CHECK:   [[VQSHLU_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftsu.v8i16(<8 x i16> [[VQSHLU_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
10924 // CHECK:   ret <8 x i16> [[VQSHLU_N1]]
test_vqshluq_n_s16(int16x8_t a)10925 uint16x8_t test_vqshluq_n_s16(int16x8_t a) {
10926   return vqshluq_n_s16(a, 1);
10927 }
10928 
10929 // CHECK-LABEL: @test_vqshluq_n_s32(
10930 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
10931 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
10932 // CHECK:   [[VQSHLU_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftsu.v4i32(<4 x i32> [[VQSHLU_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
10933 // CHECK:   ret <4 x i32> [[VQSHLU_N1]]
test_vqshluq_n_s32(int32x4_t a)10934 uint32x4_t test_vqshluq_n_s32(int32x4_t a) {
10935   return vqshluq_n_s32(a, 1);
10936 }
10937 
10938 // CHECK-LABEL: @test_vqshluq_n_s64(
10939 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
10940 // CHECK:   [[VQSHLU_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
10941 // CHECK:   [[VQSHLU_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftsu.v2i64(<2 x i64> [[VQSHLU_N]], <2 x i64> <i64 1, i64 1>)
10942 // CHECK:   ret <2 x i64> [[VQSHLU_N1]]
test_vqshluq_n_s64(int64x2_t a)10943 uint64x2_t test_vqshluq_n_s64(int64x2_t a) {
10944   return vqshluq_n_s64(a, 1);
10945 }
10946 
10947 // CHECK-LABEL: @test_vqshl_n_s8(
10948 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10949 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_s8(int8x8_t a)10950 int8x8_t test_vqshl_n_s8(int8x8_t a) {
10951   return vqshl_n_s8(a, 1);
10952 }
10953 
10954 // CHECK-LABEL: @test_vqshl_n_s16(
10955 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10956 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10957 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshifts.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10958 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_s16(int16x4_t a)10959 int16x4_t test_vqshl_n_s16(int16x4_t a) {
10960   return vqshl_n_s16(a, 1);
10961 }
10962 
10963 // CHECK-LABEL: @test_vqshl_n_s32(
10964 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10965 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
10966 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshifts.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
10967 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_s32(int32x2_t a)10968 int32x2_t test_vqshl_n_s32(int32x2_t a) {
10969   return vqshl_n_s32(a, 1);
10970 }
10971 
10972 // CHECK-LABEL: @test_vqshl_n_s64(
10973 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
10974 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
10975 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshifts.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
10976 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_s64(int64x1_t a)10977 int64x1_t test_vqshl_n_s64(int64x1_t a) {
10978   return vqshl_n_s64(a, 1);
10979 }
10980 
10981 // CHECK-LABEL: @test_vqshl_n_u8(
10982 // CHECK:   [[VQSHL_N:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
10983 // CHECK:   ret <8 x i8> [[VQSHL_N]]
test_vqshl_n_u8(uint8x8_t a)10984 uint8x8_t test_vqshl_n_u8(uint8x8_t a) {
10985   return vqshl_n_u8(a, 1);
10986 }
10987 
10988 // CHECK-LABEL: @test_vqshl_n_u16(
10989 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
10990 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
10991 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftu.v4i16(<4 x i16> [[VQSHL_N]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
10992 // CHECK:   ret <4 x i16> [[VQSHL_N1]]
test_vqshl_n_u16(uint16x4_t a)10993 uint16x4_t test_vqshl_n_u16(uint16x4_t a) {
10994   return vqshl_n_u16(a, 1);
10995 }
10996 
10997 // CHECK-LABEL: @test_vqshl_n_u32(
10998 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
10999 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
11000 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftu.v2i32(<2 x i32> [[VQSHL_N]], <2 x i32> <i32 1, i32 1>)
11001 // CHECK:   ret <2 x i32> [[VQSHL_N1]]
test_vqshl_n_u32(uint32x2_t a)11002 uint32x2_t test_vqshl_n_u32(uint32x2_t a) {
11003   return vqshl_n_u32(a, 1);
11004 }
11005 
11006 // CHECK-LABEL: @test_vqshl_n_u64(
11007 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11008 // CHECK:   [[VQSHL_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
11009 // CHECK:   [[VQSHL_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vqshiftu.v1i64(<1 x i64> [[VQSHL_N]], <1 x i64> <i64 1>)
11010 // CHECK:   ret <1 x i64> [[VQSHL_N1]]
test_vqshl_n_u64(uint64x1_t a)11011 uint64x1_t test_vqshl_n_u64(uint64x1_t a) {
11012   return vqshl_n_u64(a, 1);
11013 }
11014 
11015 // CHECK-LABEL: @test_vqshlq_n_s8(
11016 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
11017 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_s8(int8x16_t a)11018 int8x16_t test_vqshlq_n_s8(int8x16_t a) {
11019   return vqshlq_n_s8(a, 1);
11020 }
11021 
11022 // CHECK-LABEL: @test_vqshlq_n_s16(
11023 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11024 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11025 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshifts.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
11026 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_s16(int16x8_t a)11027 int16x8_t test_vqshlq_n_s16(int16x8_t a) {
11028   return vqshlq_n_s16(a, 1);
11029 }
11030 
11031 // CHECK-LABEL: @test_vqshlq_n_s32(
11032 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11033 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11034 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshifts.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
11035 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_s32(int32x4_t a)11036 int32x4_t test_vqshlq_n_s32(int32x4_t a) {
11037   return vqshlq_n_s32(a, 1);
11038 }
11039 
11040 // CHECK-LABEL: @test_vqshlq_n_s64(
11041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11042 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11043 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshifts.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
11044 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_s64(int64x2_t a)11045 int64x2_t test_vqshlq_n_s64(int64x2_t a) {
11046   return vqshlq_n_s64(a, 1);
11047 }
11048 
11049 // CHECK-LABEL: @test_vqshlq_n_u8(
11050 // CHECK:   [[VQSHL_N:%.*]] = call <16 x i8> @llvm.arm.neon.vqshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
11051 // CHECK:   ret <16 x i8> [[VQSHL_N]]
test_vqshlq_n_u8(uint8x16_t a)11052 uint8x16_t test_vqshlq_n_u8(uint8x16_t a) {
11053   return vqshlq_n_u8(a, 1);
11054 }
11055 
11056 // CHECK-LABEL: @test_vqshlq_n_u16(
11057 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11058 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11059 // CHECK:   [[VQSHL_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vqshiftu.v8i16(<8 x i16> [[VQSHL_N]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
11060 // CHECK:   ret <8 x i16> [[VQSHL_N1]]
test_vqshlq_n_u16(uint16x8_t a)11061 uint16x8_t test_vqshlq_n_u16(uint16x8_t a) {
11062   return vqshlq_n_u16(a, 1);
11063 }
11064 
11065 // CHECK-LABEL: @test_vqshlq_n_u32(
11066 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11067 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11068 // CHECK:   [[VQSHL_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vqshiftu.v4i32(<4 x i32> [[VQSHL_N]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
11069 // CHECK:   ret <4 x i32> [[VQSHL_N1]]
test_vqshlq_n_u32(uint32x4_t a)11070 uint32x4_t test_vqshlq_n_u32(uint32x4_t a) {
11071   return vqshlq_n_u32(a, 1);
11072 }
11073 
11074 // CHECK-LABEL: @test_vqshlq_n_u64(
11075 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11076 // CHECK:   [[VQSHL_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11077 // CHECK:   [[VQSHL_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vqshiftu.v2i64(<2 x i64> [[VQSHL_N]], <2 x i64> <i64 1, i64 1>)
11078 // CHECK:   ret <2 x i64> [[VQSHL_N1]]
test_vqshlq_n_u64(uint64x2_t a)11079 uint64x2_t test_vqshlq_n_u64(uint64x2_t a) {
11080   return vqshlq_n_u64(a, 1);
11081 }
11082 
11083 // CHECK-LABEL: @test_vqshrn_n_s16(
11084 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11085 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11086 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftns.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11087 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_s16(int16x8_t a)11088 int8x8_t test_vqshrn_n_s16(int16x8_t a) {
11089   return vqshrn_n_s16(a, 1);
11090 }
11091 
11092 // CHECK-LABEL: @test_vqshrn_n_s32(
11093 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11094 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11095 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftns.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11096 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_s32(int32x4_t a)11097 int16x4_t test_vqshrn_n_s32(int32x4_t a) {
11098   return vqshrn_n_s32(a, 1);
11099 }
11100 
11101 // CHECK-LABEL: @test_vqshrn_n_s64(
11102 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11103 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11104 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftns.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11105 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_s64(int64x2_t a)11106 int32x2_t test_vqshrn_n_s64(int64x2_t a) {
11107   return vqshrn_n_s64(a, 1);
11108 }
11109 
11110 // CHECK-LABEL: @test_vqshrn_n_u16(
11111 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11112 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11113 // CHECK:   [[VQSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnu.v8i8(<8 x i16> [[VQSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11114 // CHECK:   ret <8 x i8> [[VQSHRN_N1]]
test_vqshrn_n_u16(uint16x8_t a)11115 uint8x8_t test_vqshrn_n_u16(uint16x8_t a) {
11116   return vqshrn_n_u16(a, 1);
11117 }
11118 
11119 // CHECK-LABEL: @test_vqshrn_n_u32(
11120 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11121 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11122 // CHECK:   [[VQSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnu.v4i16(<4 x i32> [[VQSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11123 // CHECK:   ret <4 x i16> [[VQSHRN_N1]]
test_vqshrn_n_u32(uint32x4_t a)11124 uint16x4_t test_vqshrn_n_u32(uint32x4_t a) {
11125   return vqshrn_n_u32(a, 1);
11126 }
11127 
11128 // CHECK-LABEL: @test_vqshrn_n_u64(
11129 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11130 // CHECK:   [[VQSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11131 // CHECK:   [[VQSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnu.v2i32(<2 x i64> [[VQSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
11132 // CHECK:   ret <2 x i32> [[VQSHRN_N1]]
test_vqshrn_n_u64(uint64x2_t a)11133 uint32x2_t test_vqshrn_n_u64(uint64x2_t a) {
11134   return vqshrn_n_u64(a, 1);
11135 }
11136 
11137 // CHECK-LABEL: @test_vqshrun_n_s16(
11138 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11139 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
11140 // CHECK:   [[VQSHRUN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vqshiftnsu.v8i8(<8 x i16> [[VQSHRUN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
11141 // CHECK:   ret <8 x i8> [[VQSHRUN_N1]]
test_vqshrun_n_s16(int16x8_t a)11142 uint8x8_t test_vqshrun_n_s16(int16x8_t a) {
11143   return vqshrun_n_s16(a, 1);
11144 }
11145 
11146 // CHECK-LABEL: @test_vqshrun_n_s32(
11147 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11148 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
11149 // CHECK:   [[VQSHRUN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vqshiftnsu.v4i16(<4 x i32> [[VQSHRUN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
11150 // CHECK:   ret <4 x i16> [[VQSHRUN_N1]]
test_vqshrun_n_s32(int32x4_t a)11151 uint16x4_t test_vqshrun_n_s32(int32x4_t a) {
11152   return vqshrun_n_s32(a, 1);
11153 }
11154 
11155 // CHECK-LABEL: @test_vqshrun_n_s64(
11156 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11157 // CHECK:   [[VQSHRUN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
11158 // CHECK:   [[VQSHRUN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vqshiftnsu.v2i32(<2 x i64> [[VQSHRUN_N]], <2 x i64> <i64 -1, i64 -1>)
11159 // CHECK:   ret <2 x i32> [[VQSHRUN_N1]]
test_vqshrun_n_s64(int64x2_t a)11160 uint32x2_t test_vqshrun_n_s64(int64x2_t a) {
11161   return vqshrun_n_s64(a, 1);
11162 }
11163 
11164 // CHECK-LABEL: @test_vqsub_s8(
11165 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11166 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_s8(int8x8_t a,int8x8_t b)11167 int8x8_t test_vqsub_s8(int8x8_t a, int8x8_t b) {
11168   return vqsub_s8(a, b);
11169 }
11170 
11171 // CHECK-LABEL: @test_vqsub_s16(
11172 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11173 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11174 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11175 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11176 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_s16(int16x4_t a,int16x4_t b)11177 int16x4_t test_vqsub_s16(int16x4_t a, int16x4_t b) {
11178   return vqsub_s16(a, b);
11179 }
11180 
11181 // CHECK-LABEL: @test_vqsub_s32(
11182 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11183 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11184 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11185 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11186 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_s32(int32x2_t a,int32x2_t b)11187 int32x2_t test_vqsub_s32(int32x2_t a, int32x2_t b) {
11188   return vqsub_s32(a, b);
11189 }
11190 
11191 // CHECK-LABEL: @test_vqsub_s64(
11192 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11193 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11194 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.ssub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11195 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11196 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_s64(int64x1_t a,int64x1_t b)11197 int64x1_t test_vqsub_s64(int64x1_t a, int64x1_t b) {
11198   return vqsub_s64(a, b);
11199 }
11200 
11201 // CHECK-LABEL: @test_vqsub_u8(
11202 // CHECK:   [[VQSUB_V_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
11203 // CHECK:   ret <8 x i8> [[VQSUB_V_I]]
test_vqsub_u8(uint8x8_t a,uint8x8_t b)11204 uint8x8_t test_vqsub_u8(uint8x8_t a, uint8x8_t b) {
11205   return vqsub_u8(a, b);
11206 }
11207 
11208 // CHECK-LABEL: @test_vqsub_u16(
11209 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11210 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
11211 // CHECK:   [[VQSUB_V2_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
11212 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <4 x i16> [[VQSUB_V2_I]] to <8 x i8>
11213 // CHECK:   ret <4 x i16> [[VQSUB_V2_I]]
test_vqsub_u16(uint16x4_t a,uint16x4_t b)11214 uint16x4_t test_vqsub_u16(uint16x4_t a, uint16x4_t b) {
11215   return vqsub_u16(a, b);
11216 }
11217 
11218 // CHECK-LABEL: @test_vqsub_u32(
11219 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11220 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
11221 // CHECK:   [[VQSUB_V2_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> %a, <2 x i32> %b)
11222 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <2 x i32> [[VQSUB_V2_I]] to <8 x i8>
11223 // CHECK:   ret <2 x i32> [[VQSUB_V2_I]]
test_vqsub_u32(uint32x2_t a,uint32x2_t b)11224 uint32x2_t test_vqsub_u32(uint32x2_t a, uint32x2_t b) {
11225   return vqsub_u32(a, b);
11226 }
11227 
11228 // CHECK-LABEL: @test_vqsub_u64(
11229 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11230 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
11231 // CHECK:   [[VQSUB_V2_I:%.*]] = call <1 x i64> @llvm.usub.sat.v1i64(<1 x i64> %a, <1 x i64> %b)
11232 // CHECK:   [[VQSUB_V3_I:%.*]] = bitcast <1 x i64> [[VQSUB_V2_I]] to <8 x i8>
11233 // CHECK:   ret <1 x i64> [[VQSUB_V2_I]]
test_vqsub_u64(uint64x1_t a,uint64x1_t b)11234 uint64x1_t test_vqsub_u64(uint64x1_t a, uint64x1_t b) {
11235   return vqsub_u64(a, b);
11236 }
11237 
11238 // CHECK-LABEL: @test_vqsubq_s8(
11239 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11240 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_s8(int8x16_t a,int8x16_t b)11241 int8x16_t test_vqsubq_s8(int8x16_t a, int8x16_t b) {
11242   return vqsubq_s8(a, b);
11243 }
11244 
11245 // CHECK-LABEL: @test_vqsubq_s16(
11246 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11247 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11248 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11249 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11250 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_s16(int16x8_t a,int16x8_t b)11251 int16x8_t test_vqsubq_s16(int16x8_t a, int16x8_t b) {
11252   return vqsubq_s16(a, b);
11253 }
11254 
11255 // CHECK-LABEL: @test_vqsubq_s32(
11256 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11257 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11258 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.ssub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11259 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11260 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_s32(int32x4_t a,int32x4_t b)11261 int32x4_t test_vqsubq_s32(int32x4_t a, int32x4_t b) {
11262   return vqsubq_s32(a, b);
11263 }
11264 
11265 // CHECK-LABEL: @test_vqsubq_s64(
11266 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11267 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11268 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11269 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11270 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_s64(int64x2_t a,int64x2_t b)11271 int64x2_t test_vqsubq_s64(int64x2_t a, int64x2_t b) {
11272   return vqsubq_s64(a, b);
11273 }
11274 
11275 // CHECK-LABEL: @test_vqsubq_u8(
11276 // CHECK:   [[VQSUBQ_V_I:%.*]] = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a, <16 x i8> %b)
11277 // CHECK:   ret <16 x i8> [[VQSUBQ_V_I]]
test_vqsubq_u8(uint8x16_t a,uint8x16_t b)11278 uint8x16_t test_vqsubq_u8(uint8x16_t a, uint8x16_t b) {
11279   return vqsubq_u8(a, b);
11280 }
11281 
11282 // CHECK-LABEL: @test_vqsubq_u16(
11283 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11284 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11285 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a, <8 x i16> %b)
11286 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <8 x i16> [[VQSUBQ_V2_I]] to <16 x i8>
11287 // CHECK:   ret <8 x i16> [[VQSUBQ_V2_I]]
test_vqsubq_u16(uint16x8_t a,uint16x8_t b)11288 uint16x8_t test_vqsubq_u16(uint16x8_t a, uint16x8_t b) {
11289   return vqsubq_u16(a, b);
11290 }
11291 
11292 // CHECK-LABEL: @test_vqsubq_u32(
11293 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11294 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11295 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <4 x i32> @llvm.usub.sat.v4i32(<4 x i32> %a, <4 x i32> %b)
11296 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <4 x i32> [[VQSUBQ_V2_I]] to <16 x i8>
11297 // CHECK:   ret <4 x i32> [[VQSUBQ_V2_I]]
test_vqsubq_u32(uint32x4_t a,uint32x4_t b)11298 uint32x4_t test_vqsubq_u32(uint32x4_t a, uint32x4_t b) {
11299   return vqsubq_u32(a, b);
11300 }
11301 
11302 // CHECK-LABEL: @test_vqsubq_u64(
11303 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11304 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11305 // CHECK:   [[VQSUBQ_V2_I:%.*]] = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %a, <2 x i64> %b)
11306 // CHECK:   [[VQSUBQ_V3_I:%.*]] = bitcast <2 x i64> [[VQSUBQ_V2_I]] to <16 x i8>
11307 // CHECK:   ret <2 x i64> [[VQSUBQ_V2_I]]
test_vqsubq_u64(uint64x2_t a,uint64x2_t b)11308 uint64x2_t test_vqsubq_u64(uint64x2_t a, uint64x2_t b) {
11309   return vqsubq_u64(a, b);
11310 }
11311 
11312 // CHECK-LABEL: @test_vraddhn_s16(
11313 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11314 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11315 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11316 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_s16(int16x8_t a,int16x8_t b)11317 int8x8_t test_vraddhn_s16(int16x8_t a, int16x8_t b) {
11318   return vraddhn_s16(a, b);
11319 }
11320 
11321 // CHECK-LABEL: @test_vraddhn_s32(
11322 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11323 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11324 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11325 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11326 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_s32(int32x4_t a,int32x4_t b)11327 int16x4_t test_vraddhn_s32(int32x4_t a, int32x4_t b) {
11328   return vraddhn_s32(a, b);
11329 }
11330 
11331 // CHECK-LABEL: @test_vraddhn_s64(
11332 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11333 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11334 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11335 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11336 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_s64(int64x2_t a,int64x2_t b)11337 int32x2_t test_vraddhn_s64(int64x2_t a, int64x2_t b) {
11338   return vraddhn_s64(a, b);
11339 }
11340 
11341 // CHECK-LABEL: @test_vraddhn_u16(
11342 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
11343 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
11344 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vraddhn.v8i8(<8 x i16> %a, <8 x i16> %b)
11345 // CHECK:   ret <8 x i8> [[VRADDHN_V2_I]]
test_vraddhn_u16(uint16x8_t a,uint16x8_t b)11346 uint8x8_t test_vraddhn_u16(uint16x8_t a, uint16x8_t b) {
11347   return vraddhn_u16(a, b);
11348 }
11349 
11350 // CHECK-LABEL: @test_vraddhn_u32(
11351 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11352 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
11353 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vraddhn.v4i16(<4 x i32> %a, <4 x i32> %b)
11354 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <4 x i16> [[VRADDHN_V2_I]] to <8 x i8>
11355 // CHECK:   ret <4 x i16> [[VRADDHN_V2_I]]
test_vraddhn_u32(uint32x4_t a,uint32x4_t b)11356 uint16x4_t test_vraddhn_u32(uint32x4_t a, uint32x4_t b) {
11357   return vraddhn_u32(a, b);
11358 }
11359 
11360 // CHECK-LABEL: @test_vraddhn_u64(
11361 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
11362 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
11363 // CHECK:   [[VRADDHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vraddhn.v2i32(<2 x i64> %a, <2 x i64> %b)
11364 // CHECK:   [[VRADDHN_V3_I:%.*]] = bitcast <2 x i32> [[VRADDHN_V2_I]] to <8 x i8>
11365 // CHECK:   ret <2 x i32> [[VRADDHN_V2_I]]
test_vraddhn_u64(uint64x2_t a,uint64x2_t b)11366 uint32x2_t test_vraddhn_u64(uint64x2_t a, uint64x2_t b) {
11367   return vraddhn_u64(a, b);
11368 }
11369 
11370 // CHECK-LABEL: @test_vrecpe_f32(
11371 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11372 // CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %a)
11373 // CHECK:   ret <2 x float> [[VRECPE_V1_I]]
test_vrecpe_f32(float32x2_t a)11374 float32x2_t test_vrecpe_f32(float32x2_t a) {
11375   return vrecpe_f32(a);
11376 }
11377 
11378 // CHECK-LABEL: @test_vrecpe_u32(
11379 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11380 // CHECK:   [[VRECPE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32> %a)
11381 // CHECK:   ret <2 x i32> [[VRECPE_V1_I]]
test_vrecpe_u32(uint32x2_t a)11382 uint32x2_t test_vrecpe_u32(uint32x2_t a) {
11383   return vrecpe_u32(a);
11384 }
11385 
11386 // CHECK-LABEL: @test_vrecpeq_f32(
11387 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11388 // CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %a)
11389 // CHECK:   ret <4 x float> [[VRECPEQ_V1_I]]
test_vrecpeq_f32(float32x4_t a)11390 float32x4_t test_vrecpeq_f32(float32x4_t a) {
11391   return vrecpeq_f32(a);
11392 }
11393 
11394 // CHECK-LABEL: @test_vrecpeq_u32(
11395 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
11396 // CHECK:   [[VRECPEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32> %a)
11397 // CHECK:   ret <4 x i32> [[VRECPEQ_V1_I]]
test_vrecpeq_u32(uint32x4_t a)11398 uint32x4_t test_vrecpeq_u32(uint32x4_t a) {
11399   return vrecpeq_u32(a);
11400 }
11401 
11402 // CHECK-LABEL: @test_vrecps_f32(
11403 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11404 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
11405 // CHECK:   [[VRECPS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrecps.v2f32(<2 x float> %a, <2 x float> %b)
11406 // CHECK:   [[VRECPS_V3_I:%.*]] = bitcast <2 x float> [[VRECPS_V2_I]] to <8 x i8>
11407 // CHECK:   ret <2 x float> [[VRECPS_V2_I]]
test_vrecps_f32(float32x2_t a,float32x2_t b)11408 float32x2_t test_vrecps_f32(float32x2_t a, float32x2_t b) {
11409   return vrecps_f32(a, b);
11410 }
11411 
11412 // CHECK-LABEL: @test_vrecpsq_f32(
11413 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
11414 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
11415 // CHECK:   [[VRECPSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrecps.v4f32(<4 x float> %a, <4 x float> %b)
11416 // CHECK:   [[VRECPSQ_V3_I:%.*]] = bitcast <4 x float> [[VRECPSQ_V2_I]] to <16 x i8>
11417 // CHECK:   ret <4 x float> [[VRECPSQ_V2_I]]
test_vrecpsq_f32(float32x4_t a,float32x4_t b)11418 float32x4_t test_vrecpsq_f32(float32x4_t a, float32x4_t b) {
11419   return vrecpsq_f32(a, b);
11420 }
11421 
11422 // CHECK-LABEL: @test_vreinterpret_s8_s16(
11423 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11424 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s16(int16x4_t a)11425 int8x8_t test_vreinterpret_s8_s16(int16x4_t a) {
11426   return vreinterpret_s8_s16(a);
11427 }
11428 
11429 // CHECK-LABEL: @test_vreinterpret_s8_s32(
11430 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11431 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s32(int32x2_t a)11432 int8x8_t test_vreinterpret_s8_s32(int32x2_t a) {
11433   return vreinterpret_s8_s32(a);
11434 }
11435 
11436 // CHECK-LABEL: @test_vreinterpret_s8_s64(
11437 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11438 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_s64(int64x1_t a)11439 int8x8_t test_vreinterpret_s8_s64(int64x1_t a) {
11440   return vreinterpret_s8_s64(a);
11441 }
11442 
11443 // CHECK-LABEL: @test_vreinterpret_s8_u8(
11444 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_u8(uint8x8_t a)11445 int8x8_t test_vreinterpret_s8_u8(uint8x8_t a) {
11446   return vreinterpret_s8_u8(a);
11447 }
11448 
11449 // CHECK-LABEL: @test_vreinterpret_s8_u16(
11450 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11451 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u16(uint16x4_t a)11452 int8x8_t test_vreinterpret_s8_u16(uint16x4_t a) {
11453   return vreinterpret_s8_u16(a);
11454 }
11455 
11456 // CHECK-LABEL: @test_vreinterpret_s8_u32(
11457 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11458 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u32(uint32x2_t a)11459 int8x8_t test_vreinterpret_s8_u32(uint32x2_t a) {
11460   return vreinterpret_s8_u32(a);
11461 }
11462 
11463 // CHECK-LABEL: @test_vreinterpret_s8_u64(
11464 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11465 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_u64(uint64x1_t a)11466 int8x8_t test_vreinterpret_s8_u64(uint64x1_t a) {
11467   return vreinterpret_s8_u64(a);
11468 }
11469 
11470 // CHECK-LABEL: @test_vreinterpret_s8_f16(
11471 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11472 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f16(float16x4_t a)11473 int8x8_t test_vreinterpret_s8_f16(float16x4_t a) {
11474   return vreinterpret_s8_f16(a);
11475 }
11476 
11477 // CHECK-LABEL: @test_vreinterpret_s8_f32(
11478 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11479 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_f32(float32x2_t a)11480 int8x8_t test_vreinterpret_s8_f32(float32x2_t a) {
11481   return vreinterpret_s8_f32(a);
11482 }
11483 
11484 // CHECK-LABEL: @test_vreinterpret_s8_p8(
11485 // CHECK:   ret <8 x i8> %a
test_vreinterpret_s8_p8(poly8x8_t a)11486 int8x8_t test_vreinterpret_s8_p8(poly8x8_t a) {
11487   return vreinterpret_s8_p8(a);
11488 }
11489 
11490 // CHECK-LABEL: @test_vreinterpret_s8_p16(
11491 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11492 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_s8_p16(poly16x4_t a)11493 int8x8_t test_vreinterpret_s8_p16(poly16x4_t a) {
11494   return vreinterpret_s8_p16(a);
11495 }
11496 
11497 // CHECK-LABEL: @test_vreinterpret_s16_s8(
11498 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11499 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s8(int8x8_t a)11500 int16x4_t test_vreinterpret_s16_s8(int8x8_t a) {
11501   return vreinterpret_s16_s8(a);
11502 }
11503 
11504 // CHECK-LABEL: @test_vreinterpret_s16_s32(
11505 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11506 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s32(int32x2_t a)11507 int16x4_t test_vreinterpret_s16_s32(int32x2_t a) {
11508   return vreinterpret_s16_s32(a);
11509 }
11510 
11511 // CHECK-LABEL: @test_vreinterpret_s16_s64(
11512 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11513 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_s64(int64x1_t a)11514 int16x4_t test_vreinterpret_s16_s64(int64x1_t a) {
11515   return vreinterpret_s16_s64(a);
11516 }
11517 
11518 // CHECK-LABEL: @test_vreinterpret_s16_u8(
11519 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11520 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u8(uint8x8_t a)11521 int16x4_t test_vreinterpret_s16_u8(uint8x8_t a) {
11522   return vreinterpret_s16_u8(a);
11523 }
11524 
11525 // CHECK-LABEL: @test_vreinterpret_s16_u16(
11526 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_u16(uint16x4_t a)11527 int16x4_t test_vreinterpret_s16_u16(uint16x4_t a) {
11528   return vreinterpret_s16_u16(a);
11529 }
11530 
11531 // CHECK-LABEL: @test_vreinterpret_s16_u32(
11532 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11533 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u32(uint32x2_t a)11534 int16x4_t test_vreinterpret_s16_u32(uint32x2_t a) {
11535   return vreinterpret_s16_u32(a);
11536 }
11537 
11538 // CHECK-LABEL: @test_vreinterpret_s16_u64(
11539 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11540 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_u64(uint64x1_t a)11541 int16x4_t test_vreinterpret_s16_u64(uint64x1_t a) {
11542   return vreinterpret_s16_u64(a);
11543 }
11544 
11545 // CHECK-LABEL: @test_vreinterpret_s16_f16(
11546 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11547 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f16(float16x4_t a)11548 int16x4_t test_vreinterpret_s16_f16(float16x4_t a) {
11549   return vreinterpret_s16_f16(a);
11550 }
11551 
11552 // CHECK-LABEL: @test_vreinterpret_s16_f32(
11553 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11554 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_f32(float32x2_t a)11555 int16x4_t test_vreinterpret_s16_f32(float32x2_t a) {
11556   return vreinterpret_s16_f32(a);
11557 }
11558 
11559 // CHECK-LABEL: @test_vreinterpret_s16_p8(
11560 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11561 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_s16_p8(poly8x8_t a)11562 int16x4_t test_vreinterpret_s16_p8(poly8x8_t a) {
11563   return vreinterpret_s16_p8(a);
11564 }
11565 
11566 // CHECK-LABEL: @test_vreinterpret_s16_p16(
11567 // CHECK:   ret <4 x i16> %a
test_vreinterpret_s16_p16(poly16x4_t a)11568 int16x4_t test_vreinterpret_s16_p16(poly16x4_t a) {
11569   return vreinterpret_s16_p16(a);
11570 }
11571 
11572 // CHECK-LABEL: @test_vreinterpret_s32_s8(
11573 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11574 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s8(int8x8_t a)11575 int32x2_t test_vreinterpret_s32_s8(int8x8_t a) {
11576   return vreinterpret_s32_s8(a);
11577 }
11578 
11579 // CHECK-LABEL: @test_vreinterpret_s32_s16(
11580 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11581 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s16(int16x4_t a)11582 int32x2_t test_vreinterpret_s32_s16(int16x4_t a) {
11583   return vreinterpret_s32_s16(a);
11584 }
11585 
11586 // CHECK-LABEL: @test_vreinterpret_s32_s64(
11587 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11588 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_s64(int64x1_t a)11589 int32x2_t test_vreinterpret_s32_s64(int64x1_t a) {
11590   return vreinterpret_s32_s64(a);
11591 }
11592 
11593 // CHECK-LABEL: @test_vreinterpret_s32_u8(
11594 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11595 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u8(uint8x8_t a)11596 int32x2_t test_vreinterpret_s32_u8(uint8x8_t a) {
11597   return vreinterpret_s32_u8(a);
11598 }
11599 
11600 // CHECK-LABEL: @test_vreinterpret_s32_u16(
11601 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11602 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u16(uint16x4_t a)11603 int32x2_t test_vreinterpret_s32_u16(uint16x4_t a) {
11604   return vreinterpret_s32_u16(a);
11605 }
11606 
11607 // CHECK-LABEL: @test_vreinterpret_s32_u32(
11608 // CHECK:   ret <2 x i32> %a
test_vreinterpret_s32_u32(uint32x2_t a)11609 int32x2_t test_vreinterpret_s32_u32(uint32x2_t a) {
11610   return vreinterpret_s32_u32(a);
11611 }
11612 
11613 // CHECK-LABEL: @test_vreinterpret_s32_u64(
11614 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11615 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_u64(uint64x1_t a)11616 int32x2_t test_vreinterpret_s32_u64(uint64x1_t a) {
11617   return vreinterpret_s32_u64(a);
11618 }
11619 
11620 // CHECK-LABEL: @test_vreinterpret_s32_f16(
11621 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11622 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f16(float16x4_t a)11623 int32x2_t test_vreinterpret_s32_f16(float16x4_t a) {
11624   return vreinterpret_s32_f16(a);
11625 }
11626 
11627 // CHECK-LABEL: @test_vreinterpret_s32_f32(
11628 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11629 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_f32(float32x2_t a)11630 int32x2_t test_vreinterpret_s32_f32(float32x2_t a) {
11631   return vreinterpret_s32_f32(a);
11632 }
11633 
11634 // CHECK-LABEL: @test_vreinterpret_s32_p8(
11635 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11636 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p8(poly8x8_t a)11637 int32x2_t test_vreinterpret_s32_p8(poly8x8_t a) {
11638   return vreinterpret_s32_p8(a);
11639 }
11640 
11641 // CHECK-LABEL: @test_vreinterpret_s32_p16(
11642 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11643 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_s32_p16(poly16x4_t a)11644 int32x2_t test_vreinterpret_s32_p16(poly16x4_t a) {
11645   return vreinterpret_s32_p16(a);
11646 }
11647 
11648 // CHECK-LABEL: @test_vreinterpret_s64_s8(
11649 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11650 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s8(int8x8_t a)11651 int64x1_t test_vreinterpret_s64_s8(int8x8_t a) {
11652   return vreinterpret_s64_s8(a);
11653 }
11654 
11655 // CHECK-LABEL: @test_vreinterpret_s64_s16(
11656 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11657 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s16(int16x4_t a)11658 int64x1_t test_vreinterpret_s64_s16(int16x4_t a) {
11659   return vreinterpret_s64_s16(a);
11660 }
11661 
11662 // CHECK-LABEL: @test_vreinterpret_s64_s32(
11663 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11664 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_s32(int32x2_t a)11665 int64x1_t test_vreinterpret_s64_s32(int32x2_t a) {
11666   return vreinterpret_s64_s32(a);
11667 }
11668 
11669 // CHECK-LABEL: @test_vreinterpret_s64_u8(
11670 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11671 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u8(uint8x8_t a)11672 int64x1_t test_vreinterpret_s64_u8(uint8x8_t a) {
11673   return vreinterpret_s64_u8(a);
11674 }
11675 
11676 // CHECK-LABEL: @test_vreinterpret_s64_u16(
11677 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11678 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u16(uint16x4_t a)11679 int64x1_t test_vreinterpret_s64_u16(uint16x4_t a) {
11680   return vreinterpret_s64_u16(a);
11681 }
11682 
11683 // CHECK-LABEL: @test_vreinterpret_s64_u32(
11684 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11685 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_u32(uint32x2_t a)11686 int64x1_t test_vreinterpret_s64_u32(uint32x2_t a) {
11687   return vreinterpret_s64_u32(a);
11688 }
11689 
11690 // CHECK-LABEL: @test_vreinterpret_s64_u64(
11691 // CHECK:   ret <1 x i64> %a
test_vreinterpret_s64_u64(uint64x1_t a)11692 int64x1_t test_vreinterpret_s64_u64(uint64x1_t a) {
11693   return vreinterpret_s64_u64(a);
11694 }
11695 
11696 // CHECK-LABEL: @test_vreinterpret_s64_f16(
11697 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
11698 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f16(float16x4_t a)11699 int64x1_t test_vreinterpret_s64_f16(float16x4_t a) {
11700   return vreinterpret_s64_f16(a);
11701 }
11702 
11703 // CHECK-LABEL: @test_vreinterpret_s64_f32(
11704 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
11705 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_f32(float32x2_t a)11706 int64x1_t test_vreinterpret_s64_f32(float32x2_t a) {
11707   return vreinterpret_s64_f32(a);
11708 }
11709 
11710 // CHECK-LABEL: @test_vreinterpret_s64_p8(
11711 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11712 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p8(poly8x8_t a)11713 int64x1_t test_vreinterpret_s64_p8(poly8x8_t a) {
11714   return vreinterpret_s64_p8(a);
11715 }
11716 
11717 // CHECK-LABEL: @test_vreinterpret_s64_p16(
11718 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11719 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_s64_p16(poly16x4_t a)11720 int64x1_t test_vreinterpret_s64_p16(poly16x4_t a) {
11721   return vreinterpret_s64_p16(a);
11722 }
11723 
11724 // CHECK-LABEL: @test_vreinterpret_u8_s8(
11725 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_s8(int8x8_t a)11726 uint8x8_t test_vreinterpret_u8_s8(int8x8_t a) {
11727   return vreinterpret_u8_s8(a);
11728 }
11729 
11730 // CHECK-LABEL: @test_vreinterpret_u8_s16(
11731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11732 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s16(int16x4_t a)11733 uint8x8_t test_vreinterpret_u8_s16(int16x4_t a) {
11734   return vreinterpret_u8_s16(a);
11735 }
11736 
11737 // CHECK-LABEL: @test_vreinterpret_u8_s32(
11738 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11739 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s32(int32x2_t a)11740 uint8x8_t test_vreinterpret_u8_s32(int32x2_t a) {
11741   return vreinterpret_u8_s32(a);
11742 }
11743 
11744 // CHECK-LABEL: @test_vreinterpret_u8_s64(
11745 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11746 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_s64(int64x1_t a)11747 uint8x8_t test_vreinterpret_u8_s64(int64x1_t a) {
11748   return vreinterpret_u8_s64(a);
11749 }
11750 
11751 // CHECK-LABEL: @test_vreinterpret_u8_u16(
11752 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11753 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u16(uint16x4_t a)11754 uint8x8_t test_vreinterpret_u8_u16(uint16x4_t a) {
11755   return vreinterpret_u8_u16(a);
11756 }
11757 
11758 // CHECK-LABEL: @test_vreinterpret_u8_u32(
11759 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
11760 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u32(uint32x2_t a)11761 uint8x8_t test_vreinterpret_u8_u32(uint32x2_t a) {
11762   return vreinterpret_u8_u32(a);
11763 }
11764 
11765 // CHECK-LABEL: @test_vreinterpret_u8_u64(
11766 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
11767 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_u64(uint64x1_t a)11768 uint8x8_t test_vreinterpret_u8_u64(uint64x1_t a) {
11769   return vreinterpret_u8_u64(a);
11770 }
11771 
11772 // CHECK-LABEL: @test_vreinterpret_u8_f16(
11773 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
11774 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f16(float16x4_t a)11775 uint8x8_t test_vreinterpret_u8_f16(float16x4_t a) {
11776   return vreinterpret_u8_f16(a);
11777 }
11778 
11779 // CHECK-LABEL: @test_vreinterpret_u8_f32(
11780 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
11781 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_f32(float32x2_t a)11782 uint8x8_t test_vreinterpret_u8_f32(float32x2_t a) {
11783   return vreinterpret_u8_f32(a);
11784 }
11785 
11786 // CHECK-LABEL: @test_vreinterpret_u8_p8(
11787 // CHECK:   ret <8 x i8> %a
test_vreinterpret_u8_p8(poly8x8_t a)11788 uint8x8_t test_vreinterpret_u8_p8(poly8x8_t a) {
11789   return vreinterpret_u8_p8(a);
11790 }
11791 
11792 // CHECK-LABEL: @test_vreinterpret_u8_p16(
11793 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
11794 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_u8_p16(poly16x4_t a)11795 uint8x8_t test_vreinterpret_u8_p16(poly16x4_t a) {
11796   return vreinterpret_u8_p16(a);
11797 }
11798 
11799 // CHECK-LABEL: @test_vreinterpret_u16_s8(
11800 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11801 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s8(int8x8_t a)11802 uint16x4_t test_vreinterpret_u16_s8(int8x8_t a) {
11803   return vreinterpret_u16_s8(a);
11804 }
11805 
11806 // CHECK-LABEL: @test_vreinterpret_u16_s16(
11807 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_s16(int16x4_t a)11808 uint16x4_t test_vreinterpret_u16_s16(int16x4_t a) {
11809   return vreinterpret_u16_s16(a);
11810 }
11811 
11812 // CHECK-LABEL: @test_vreinterpret_u16_s32(
11813 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11814 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s32(int32x2_t a)11815 uint16x4_t test_vreinterpret_u16_s32(int32x2_t a) {
11816   return vreinterpret_u16_s32(a);
11817 }
11818 
11819 // CHECK-LABEL: @test_vreinterpret_u16_s64(
11820 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11821 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_s64(int64x1_t a)11822 uint16x4_t test_vreinterpret_u16_s64(int64x1_t a) {
11823   return vreinterpret_u16_s64(a);
11824 }
11825 
11826 // CHECK-LABEL: @test_vreinterpret_u16_u8(
11827 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11828 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u8(uint8x8_t a)11829 uint16x4_t test_vreinterpret_u16_u8(uint8x8_t a) {
11830   return vreinterpret_u16_u8(a);
11831 }
11832 
11833 // CHECK-LABEL: @test_vreinterpret_u16_u32(
11834 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
11835 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u32(uint32x2_t a)11836 uint16x4_t test_vreinterpret_u16_u32(uint32x2_t a) {
11837   return vreinterpret_u16_u32(a);
11838 }
11839 
11840 // CHECK-LABEL: @test_vreinterpret_u16_u64(
11841 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
11842 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_u64(uint64x1_t a)11843 uint16x4_t test_vreinterpret_u16_u64(uint64x1_t a) {
11844   return vreinterpret_u16_u64(a);
11845 }
11846 
11847 // CHECK-LABEL: @test_vreinterpret_u16_f16(
11848 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
11849 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f16(float16x4_t a)11850 uint16x4_t test_vreinterpret_u16_f16(float16x4_t a) {
11851   return vreinterpret_u16_f16(a);
11852 }
11853 
11854 // CHECK-LABEL: @test_vreinterpret_u16_f32(
11855 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
11856 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_f32(float32x2_t a)11857 uint16x4_t test_vreinterpret_u16_f32(float32x2_t a) {
11858   return vreinterpret_u16_f32(a);
11859 }
11860 
11861 // CHECK-LABEL: @test_vreinterpret_u16_p8(
11862 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
11863 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_u16_p8(poly8x8_t a)11864 uint16x4_t test_vreinterpret_u16_p8(poly8x8_t a) {
11865   return vreinterpret_u16_p8(a);
11866 }
11867 
11868 // CHECK-LABEL: @test_vreinterpret_u16_p16(
11869 // CHECK:   ret <4 x i16> %a
test_vreinterpret_u16_p16(poly16x4_t a)11870 uint16x4_t test_vreinterpret_u16_p16(poly16x4_t a) {
11871   return vreinterpret_u16_p16(a);
11872 }
11873 
11874 // CHECK-LABEL: @test_vreinterpret_u32_s8(
11875 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11876 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s8(int8x8_t a)11877 uint32x2_t test_vreinterpret_u32_s8(int8x8_t a) {
11878   return vreinterpret_u32_s8(a);
11879 }
11880 
11881 // CHECK-LABEL: @test_vreinterpret_u32_s16(
11882 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11883 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s16(int16x4_t a)11884 uint32x2_t test_vreinterpret_u32_s16(int16x4_t a) {
11885   return vreinterpret_u32_s16(a);
11886 }
11887 
11888 // CHECK-LABEL: @test_vreinterpret_u32_s32(
11889 // CHECK:   ret <2 x i32> %a
test_vreinterpret_u32_s32(int32x2_t a)11890 uint32x2_t test_vreinterpret_u32_s32(int32x2_t a) {
11891   return vreinterpret_u32_s32(a);
11892 }
11893 
11894 // CHECK-LABEL: @test_vreinterpret_u32_s64(
11895 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11896 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_s64(int64x1_t a)11897 uint32x2_t test_vreinterpret_u32_s64(int64x1_t a) {
11898   return vreinterpret_u32_s64(a);
11899 }
11900 
11901 // CHECK-LABEL: @test_vreinterpret_u32_u8(
11902 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11903 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u8(uint8x8_t a)11904 uint32x2_t test_vreinterpret_u32_u8(uint8x8_t a) {
11905   return vreinterpret_u32_u8(a);
11906 }
11907 
11908 // CHECK-LABEL: @test_vreinterpret_u32_u16(
11909 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11910 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u16(uint16x4_t a)11911 uint32x2_t test_vreinterpret_u32_u16(uint16x4_t a) {
11912   return vreinterpret_u32_u16(a);
11913 }
11914 
11915 // CHECK-LABEL: @test_vreinterpret_u32_u64(
11916 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x i32>
11917 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_u64(uint64x1_t a)11918 uint32x2_t test_vreinterpret_u32_u64(uint64x1_t a) {
11919   return vreinterpret_u32_u64(a);
11920 }
11921 
11922 // CHECK-LABEL: @test_vreinterpret_u32_f16(
11923 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x i32>
11924 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f16(float16x4_t a)11925 uint32x2_t test_vreinterpret_u32_f16(float16x4_t a) {
11926   return vreinterpret_u32_f16(a);
11927 }
11928 
11929 // CHECK-LABEL: @test_vreinterpret_u32_f32(
11930 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <2 x i32>
11931 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_f32(float32x2_t a)11932 uint32x2_t test_vreinterpret_u32_f32(float32x2_t a) {
11933   return vreinterpret_u32_f32(a);
11934 }
11935 
11936 // CHECK-LABEL: @test_vreinterpret_u32_p8(
11937 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x i32>
11938 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p8(poly8x8_t a)11939 uint32x2_t test_vreinterpret_u32_p8(poly8x8_t a) {
11940   return vreinterpret_u32_p8(a);
11941 }
11942 
11943 // CHECK-LABEL: @test_vreinterpret_u32_p16(
11944 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x i32>
11945 // CHECK:   ret <2 x i32> [[TMP0]]
test_vreinterpret_u32_p16(poly16x4_t a)11946 uint32x2_t test_vreinterpret_u32_p16(poly16x4_t a) {
11947   return vreinterpret_u32_p16(a);
11948 }
11949 
11950 // CHECK-LABEL: @test_vreinterpret_u64_s8(
11951 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11952 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s8(int8x8_t a)11953 uint64x1_t test_vreinterpret_u64_s8(int8x8_t a) {
11954   return vreinterpret_u64_s8(a);
11955 }
11956 
11957 // CHECK-LABEL: @test_vreinterpret_u64_s16(
11958 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11959 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s16(int16x4_t a)11960 uint64x1_t test_vreinterpret_u64_s16(int16x4_t a) {
11961   return vreinterpret_u64_s16(a);
11962 }
11963 
11964 // CHECK-LABEL: @test_vreinterpret_u64_s32(
11965 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11966 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_s32(int32x2_t a)11967 uint64x1_t test_vreinterpret_u64_s32(int32x2_t a) {
11968   return vreinterpret_u64_s32(a);
11969 }
11970 
11971 // CHECK-LABEL: @test_vreinterpret_u64_s64(
11972 // CHECK:   ret <1 x i64> %a
test_vreinterpret_u64_s64(int64x1_t a)11973 uint64x1_t test_vreinterpret_u64_s64(int64x1_t a) {
11974   return vreinterpret_u64_s64(a);
11975 }
11976 
11977 // CHECK-LABEL: @test_vreinterpret_u64_u8(
11978 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
11979 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u8(uint8x8_t a)11980 uint64x1_t test_vreinterpret_u64_u8(uint8x8_t a) {
11981   return vreinterpret_u64_u8(a);
11982 }
11983 
11984 // CHECK-LABEL: @test_vreinterpret_u64_u16(
11985 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
11986 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u16(uint16x4_t a)11987 uint64x1_t test_vreinterpret_u64_u16(uint16x4_t a) {
11988   return vreinterpret_u64_u16(a);
11989 }
11990 
11991 // CHECK-LABEL: @test_vreinterpret_u64_u32(
11992 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <1 x i64>
11993 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_u32(uint32x2_t a)11994 uint64x1_t test_vreinterpret_u64_u32(uint32x2_t a) {
11995   return vreinterpret_u64_u32(a);
11996 }
11997 
11998 // CHECK-LABEL: @test_vreinterpret_u64_f16(
11999 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <1 x i64>
12000 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f16(float16x4_t a)12001 uint64x1_t test_vreinterpret_u64_f16(float16x4_t a) {
12002   return vreinterpret_u64_f16(a);
12003 }
12004 
12005 // CHECK-LABEL: @test_vreinterpret_u64_f32(
12006 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <1 x i64>
12007 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_f32(float32x2_t a)12008 uint64x1_t test_vreinterpret_u64_f32(float32x2_t a) {
12009   return vreinterpret_u64_f32(a);
12010 }
12011 
12012 // CHECK-LABEL: @test_vreinterpret_u64_p8(
12013 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <1 x i64>
12014 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p8(poly8x8_t a)12015 uint64x1_t test_vreinterpret_u64_p8(poly8x8_t a) {
12016   return vreinterpret_u64_p8(a);
12017 }
12018 
12019 // CHECK-LABEL: @test_vreinterpret_u64_p16(
12020 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <1 x i64>
12021 // CHECK:   ret <1 x i64> [[TMP0]]
test_vreinterpret_u64_p16(poly16x4_t a)12022 uint64x1_t test_vreinterpret_u64_p16(poly16x4_t a) {
12023   return vreinterpret_u64_p16(a);
12024 }
12025 
12026 // CHECK-LABEL: @test_vreinterpret_f16_s8(
12027 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12028 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s8(int8x8_t a)12029 float16x4_t test_vreinterpret_f16_s8(int8x8_t a) {
12030   return vreinterpret_f16_s8(a);
12031 }
12032 
12033 // CHECK-LABEL: @test_vreinterpret_f16_s16(
12034 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12035 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s16(int16x4_t a)12036 float16x4_t test_vreinterpret_f16_s16(int16x4_t a) {
12037   return vreinterpret_f16_s16(a);
12038 }
12039 
12040 // CHECK-LABEL: @test_vreinterpret_f16_s32(
12041 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
12042 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s32(int32x2_t a)12043 float16x4_t test_vreinterpret_f16_s32(int32x2_t a) {
12044   return vreinterpret_f16_s32(a);
12045 }
12046 
12047 // CHECK-LABEL: @test_vreinterpret_f16_s64(
12048 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
12049 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_s64(int64x1_t a)12050 float16x4_t test_vreinterpret_f16_s64(int64x1_t a) {
12051   return vreinterpret_f16_s64(a);
12052 }
12053 
12054 // CHECK-LABEL: @test_vreinterpret_f16_u8(
12055 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12056 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u8(uint8x8_t a)12057 float16x4_t test_vreinterpret_f16_u8(uint8x8_t a) {
12058   return vreinterpret_f16_u8(a);
12059 }
12060 
12061 // CHECK-LABEL: @test_vreinterpret_f16_u16(
12062 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12063 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u16(uint16x4_t a)12064 float16x4_t test_vreinterpret_f16_u16(uint16x4_t a) {
12065   return vreinterpret_f16_u16(a);
12066 }
12067 
12068 // CHECK-LABEL: @test_vreinterpret_f16_u32(
12069 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x half>
12070 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u32(uint32x2_t a)12071 float16x4_t test_vreinterpret_f16_u32(uint32x2_t a) {
12072   return vreinterpret_f16_u32(a);
12073 }
12074 
12075 // CHECK-LABEL: @test_vreinterpret_f16_u64(
12076 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x half>
12077 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_u64(uint64x1_t a)12078 float16x4_t test_vreinterpret_f16_u64(uint64x1_t a) {
12079   return vreinterpret_f16_u64(a);
12080 }
12081 
12082 // CHECK-LABEL: @test_vreinterpret_f16_f32(
12083 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x half>
12084 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_f32(float32x2_t a)12085 float16x4_t test_vreinterpret_f16_f32(float32x2_t a) {
12086   return vreinterpret_f16_f32(a);
12087 }
12088 
12089 // CHECK-LABEL: @test_vreinterpret_f16_p8(
12090 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x half>
12091 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p8(poly8x8_t a)12092 float16x4_t test_vreinterpret_f16_p8(poly8x8_t a) {
12093   return vreinterpret_f16_p8(a);
12094 }
12095 
12096 // CHECK-LABEL: @test_vreinterpret_f16_p16(
12097 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <4 x half>
12098 // CHECK:   ret <4 x half> [[TMP0]]
test_vreinterpret_f16_p16(poly16x4_t a)12099 float16x4_t test_vreinterpret_f16_p16(poly16x4_t a) {
12100   return vreinterpret_f16_p16(a);
12101 }
12102 
12103 // CHECK-LABEL: @test_vreinterpret_f32_s8(
12104 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12105 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s8(int8x8_t a)12106 float32x2_t test_vreinterpret_f32_s8(int8x8_t a) {
12107   return vreinterpret_f32_s8(a);
12108 }
12109 
12110 // CHECK-LABEL: @test_vreinterpret_f32_s16(
12111 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12112 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s16(int16x4_t a)12113 float32x2_t test_vreinterpret_f32_s16(int16x4_t a) {
12114   return vreinterpret_f32_s16(a);
12115 }
12116 
12117 // CHECK-LABEL: @test_vreinterpret_f32_s32(
12118 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12119 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s32(int32x2_t a)12120 float32x2_t test_vreinterpret_f32_s32(int32x2_t a) {
12121   return vreinterpret_f32_s32(a);
12122 }
12123 
12124 // CHECK-LABEL: @test_vreinterpret_f32_s64(
12125 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12126 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_s64(int64x1_t a)12127 float32x2_t test_vreinterpret_f32_s64(int64x1_t a) {
12128   return vreinterpret_f32_s64(a);
12129 }
12130 
12131 // CHECK-LABEL: @test_vreinterpret_f32_u8(
12132 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12133 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u8(uint8x8_t a)12134 float32x2_t test_vreinterpret_f32_u8(uint8x8_t a) {
12135   return vreinterpret_f32_u8(a);
12136 }
12137 
12138 // CHECK-LABEL: @test_vreinterpret_f32_u16(
12139 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12140 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u16(uint16x4_t a)12141 float32x2_t test_vreinterpret_f32_u16(uint16x4_t a) {
12142   return vreinterpret_f32_u16(a);
12143 }
12144 
12145 // CHECK-LABEL: @test_vreinterpret_f32_u32(
12146 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <2 x float>
12147 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u32(uint32x2_t a)12148 float32x2_t test_vreinterpret_f32_u32(uint32x2_t a) {
12149   return vreinterpret_f32_u32(a);
12150 }
12151 
12152 // CHECK-LABEL: @test_vreinterpret_f32_u64(
12153 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <2 x float>
12154 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_u64(uint64x1_t a)12155 float32x2_t test_vreinterpret_f32_u64(uint64x1_t a) {
12156   return vreinterpret_f32_u64(a);
12157 }
12158 
12159 // CHECK-LABEL: @test_vreinterpret_f32_f16(
12160 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <2 x float>
12161 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_f16(float16x4_t a)12162 float32x2_t test_vreinterpret_f32_f16(float16x4_t a) {
12163   return vreinterpret_f32_f16(a);
12164 }
12165 
12166 // CHECK-LABEL: @test_vreinterpret_f32_p8(
12167 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <2 x float>
12168 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p8(poly8x8_t a)12169 float32x2_t test_vreinterpret_f32_p8(poly8x8_t a) {
12170   return vreinterpret_f32_p8(a);
12171 }
12172 
12173 // CHECK-LABEL: @test_vreinterpret_f32_p16(
12174 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <2 x float>
12175 // CHECK:   ret <2 x float> [[TMP0]]
test_vreinterpret_f32_p16(poly16x4_t a)12176 float32x2_t test_vreinterpret_f32_p16(poly16x4_t a) {
12177   return vreinterpret_f32_p16(a);
12178 }
12179 
12180 // CHECK-LABEL: @test_vreinterpret_p8_s8(
12181 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_s8(int8x8_t a)12182 poly8x8_t test_vreinterpret_p8_s8(int8x8_t a) {
12183   return vreinterpret_p8_s8(a);
12184 }
12185 
12186 // CHECK-LABEL: @test_vreinterpret_p8_s16(
12187 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12188 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s16(int16x4_t a)12189 poly8x8_t test_vreinterpret_p8_s16(int16x4_t a) {
12190   return vreinterpret_p8_s16(a);
12191 }
12192 
12193 // CHECK-LABEL: @test_vreinterpret_p8_s32(
12194 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12195 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s32(int32x2_t a)12196 poly8x8_t test_vreinterpret_p8_s32(int32x2_t a) {
12197   return vreinterpret_p8_s32(a);
12198 }
12199 
12200 // CHECK-LABEL: @test_vreinterpret_p8_s64(
12201 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12202 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_s64(int64x1_t a)12203 poly8x8_t test_vreinterpret_p8_s64(int64x1_t a) {
12204   return vreinterpret_p8_s64(a);
12205 }
12206 
12207 // CHECK-LABEL: @test_vreinterpret_p8_u8(
12208 // CHECK:   ret <8 x i8> %a
test_vreinterpret_p8_u8(uint8x8_t a)12209 poly8x8_t test_vreinterpret_p8_u8(uint8x8_t a) {
12210   return vreinterpret_p8_u8(a);
12211 }
12212 
12213 // CHECK-LABEL: @test_vreinterpret_p8_u16(
12214 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12215 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u16(uint16x4_t a)12216 poly8x8_t test_vreinterpret_p8_u16(uint16x4_t a) {
12217   return vreinterpret_p8_u16(a);
12218 }
12219 
12220 // CHECK-LABEL: @test_vreinterpret_p8_u32(
12221 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
12222 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u32(uint32x2_t a)12223 poly8x8_t test_vreinterpret_p8_u32(uint32x2_t a) {
12224   return vreinterpret_p8_u32(a);
12225 }
12226 
12227 // CHECK-LABEL: @test_vreinterpret_p8_u64(
12228 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
12229 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_u64(uint64x1_t a)12230 poly8x8_t test_vreinterpret_p8_u64(uint64x1_t a) {
12231   return vreinterpret_p8_u64(a);
12232 }
12233 
12234 // CHECK-LABEL: @test_vreinterpret_p8_f16(
12235 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8>
12236 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f16(float16x4_t a)12237 poly8x8_t test_vreinterpret_p8_f16(float16x4_t a) {
12238   return vreinterpret_p8_f16(a);
12239 }
12240 
12241 // CHECK-LABEL: @test_vreinterpret_p8_f32(
12242 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
12243 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_f32(float32x2_t a)12244 poly8x8_t test_vreinterpret_p8_f32(float32x2_t a) {
12245   return vreinterpret_p8_f32(a);
12246 }
12247 
12248 // CHECK-LABEL: @test_vreinterpret_p8_p16(
12249 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
12250 // CHECK:   ret <8 x i8> [[TMP0]]
test_vreinterpret_p8_p16(poly16x4_t a)12251 poly8x8_t test_vreinterpret_p8_p16(poly16x4_t a) {
12252   return vreinterpret_p8_p16(a);
12253 }
12254 
12255 // CHECK-LABEL: @test_vreinterpret_p16_s8(
12256 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12257 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s8(int8x8_t a)12258 poly16x4_t test_vreinterpret_p16_s8(int8x8_t a) {
12259   return vreinterpret_p16_s8(a);
12260 }
12261 
12262 // CHECK-LABEL: @test_vreinterpret_p16_s16(
12263 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_s16(int16x4_t a)12264 poly16x4_t test_vreinterpret_p16_s16(int16x4_t a) {
12265   return vreinterpret_p16_s16(a);
12266 }
12267 
12268 // CHECK-LABEL: @test_vreinterpret_p16_s32(
12269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12270 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s32(int32x2_t a)12271 poly16x4_t test_vreinterpret_p16_s32(int32x2_t a) {
12272   return vreinterpret_p16_s32(a);
12273 }
12274 
12275 // CHECK-LABEL: @test_vreinterpret_p16_s64(
12276 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12277 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_s64(int64x1_t a)12278 poly16x4_t test_vreinterpret_p16_s64(int64x1_t a) {
12279   return vreinterpret_p16_s64(a);
12280 }
12281 
12282 // CHECK-LABEL: @test_vreinterpret_p16_u8(
12283 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12284 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u8(uint8x8_t a)12285 poly16x4_t test_vreinterpret_p16_u8(uint8x8_t a) {
12286   return vreinterpret_p16_u8(a);
12287 }
12288 
12289 // CHECK-LABEL: @test_vreinterpret_p16_u16(
12290 // CHECK:   ret <4 x i16> %a
test_vreinterpret_p16_u16(uint16x4_t a)12291 poly16x4_t test_vreinterpret_p16_u16(uint16x4_t a) {
12292   return vreinterpret_p16_u16(a);
12293 }
12294 
12295 // CHECK-LABEL: @test_vreinterpret_p16_u32(
12296 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <4 x i16>
12297 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u32(uint32x2_t a)12298 poly16x4_t test_vreinterpret_p16_u32(uint32x2_t a) {
12299   return vreinterpret_p16_u32(a);
12300 }
12301 
12302 // CHECK-LABEL: @test_vreinterpret_p16_u64(
12303 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <4 x i16>
12304 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_u64(uint64x1_t a)12305 poly16x4_t test_vreinterpret_p16_u64(uint64x1_t a) {
12306   return vreinterpret_p16_u64(a);
12307 }
12308 
12309 // CHECK-LABEL: @test_vreinterpret_p16_f16(
12310 // CHECK:   [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16>
12311 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f16(float16x4_t a)12312 poly16x4_t test_vreinterpret_p16_f16(float16x4_t a) {
12313   return vreinterpret_p16_f16(a);
12314 }
12315 
12316 // CHECK-LABEL: @test_vreinterpret_p16_f32(
12317 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <4 x i16>
12318 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_f32(float32x2_t a)12319 poly16x4_t test_vreinterpret_p16_f32(float32x2_t a) {
12320   return vreinterpret_p16_f32(a);
12321 }
12322 
12323 // CHECK-LABEL: @test_vreinterpret_p16_p8(
12324 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i8> %a to <4 x i16>
12325 // CHECK:   ret <4 x i16> [[TMP0]]
test_vreinterpret_p16_p8(poly8x8_t a)12326 poly16x4_t test_vreinterpret_p16_p8(poly8x8_t a) {
12327   return vreinterpret_p16_p8(a);
12328 }
12329 
12330 // CHECK-LABEL: @test_vreinterpretq_s8_s16(
12331 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12332 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s16(int16x8_t a)12333 int8x16_t test_vreinterpretq_s8_s16(int16x8_t a) {
12334   return vreinterpretq_s8_s16(a);
12335 }
12336 
12337 // CHECK-LABEL: @test_vreinterpretq_s8_s32(
12338 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12339 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s32(int32x4_t a)12340 int8x16_t test_vreinterpretq_s8_s32(int32x4_t a) {
12341   return vreinterpretq_s8_s32(a);
12342 }
12343 
12344 // CHECK-LABEL: @test_vreinterpretq_s8_s64(
12345 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12346 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_s64(int64x2_t a)12347 int8x16_t test_vreinterpretq_s8_s64(int64x2_t a) {
12348   return vreinterpretq_s8_s64(a);
12349 }
12350 
12351 // CHECK-LABEL: @test_vreinterpretq_s8_u8(
12352 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_u8(uint8x16_t a)12353 int8x16_t test_vreinterpretq_s8_u8(uint8x16_t a) {
12354   return vreinterpretq_s8_u8(a);
12355 }
12356 
12357 // CHECK-LABEL: @test_vreinterpretq_s8_u16(
12358 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12359 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u16(uint16x8_t a)12360 int8x16_t test_vreinterpretq_s8_u16(uint16x8_t a) {
12361   return vreinterpretq_s8_u16(a);
12362 }
12363 
12364 // CHECK-LABEL: @test_vreinterpretq_s8_u32(
12365 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12366 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u32(uint32x4_t a)12367 int8x16_t test_vreinterpretq_s8_u32(uint32x4_t a) {
12368   return vreinterpretq_s8_u32(a);
12369 }
12370 
12371 // CHECK-LABEL: @test_vreinterpretq_s8_u64(
12372 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12373 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_u64(uint64x2_t a)12374 int8x16_t test_vreinterpretq_s8_u64(uint64x2_t a) {
12375   return vreinterpretq_s8_u64(a);
12376 }
12377 
12378 // CHECK-LABEL: @test_vreinterpretq_s8_f16(
12379 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12380 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f16(float16x8_t a)12381 int8x16_t test_vreinterpretq_s8_f16(float16x8_t a) {
12382   return vreinterpretq_s8_f16(a);
12383 }
12384 
12385 // CHECK-LABEL: @test_vreinterpretq_s8_f32(
12386 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12387 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_f32(float32x4_t a)12388 int8x16_t test_vreinterpretq_s8_f32(float32x4_t a) {
12389   return vreinterpretq_s8_f32(a);
12390 }
12391 
12392 // CHECK-LABEL: @test_vreinterpretq_s8_p8(
12393 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_s8_p8(poly8x16_t a)12394 int8x16_t test_vreinterpretq_s8_p8(poly8x16_t a) {
12395   return vreinterpretq_s8_p8(a);
12396 }
12397 
12398 // CHECK-LABEL: @test_vreinterpretq_s8_p16(
12399 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12400 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_s8_p16(poly16x8_t a)12401 int8x16_t test_vreinterpretq_s8_p16(poly16x8_t a) {
12402   return vreinterpretq_s8_p16(a);
12403 }
12404 
12405 // CHECK-LABEL: @test_vreinterpretq_s16_s8(
12406 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12407 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s8(int8x16_t a)12408 int16x8_t test_vreinterpretq_s16_s8(int8x16_t a) {
12409   return vreinterpretq_s16_s8(a);
12410 }
12411 
12412 // CHECK-LABEL: @test_vreinterpretq_s16_s32(
12413 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12414 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s32(int32x4_t a)12415 int16x8_t test_vreinterpretq_s16_s32(int32x4_t a) {
12416   return vreinterpretq_s16_s32(a);
12417 }
12418 
12419 // CHECK-LABEL: @test_vreinterpretq_s16_s64(
12420 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12421 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_s64(int64x2_t a)12422 int16x8_t test_vreinterpretq_s16_s64(int64x2_t a) {
12423   return vreinterpretq_s16_s64(a);
12424 }
12425 
12426 // CHECK-LABEL: @test_vreinterpretq_s16_u8(
12427 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12428 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u8(uint8x16_t a)12429 int16x8_t test_vreinterpretq_s16_u8(uint8x16_t a) {
12430   return vreinterpretq_s16_u8(a);
12431 }
12432 
12433 // CHECK-LABEL: @test_vreinterpretq_s16_u16(
12434 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_u16(uint16x8_t a)12435 int16x8_t test_vreinterpretq_s16_u16(uint16x8_t a) {
12436   return vreinterpretq_s16_u16(a);
12437 }
12438 
12439 // CHECK-LABEL: @test_vreinterpretq_s16_u32(
12440 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12441 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u32(uint32x4_t a)12442 int16x8_t test_vreinterpretq_s16_u32(uint32x4_t a) {
12443   return vreinterpretq_s16_u32(a);
12444 }
12445 
12446 // CHECK-LABEL: @test_vreinterpretq_s16_u64(
12447 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12448 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_u64(uint64x2_t a)12449 int16x8_t test_vreinterpretq_s16_u64(uint64x2_t a) {
12450   return vreinterpretq_s16_u64(a);
12451 }
12452 
12453 // CHECK-LABEL: @test_vreinterpretq_s16_f16(
12454 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12455 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f16(float16x8_t a)12456 int16x8_t test_vreinterpretq_s16_f16(float16x8_t a) {
12457   return vreinterpretq_s16_f16(a);
12458 }
12459 
12460 // CHECK-LABEL: @test_vreinterpretq_s16_f32(
12461 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12462 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_f32(float32x4_t a)12463 int16x8_t test_vreinterpretq_s16_f32(float32x4_t a) {
12464   return vreinterpretq_s16_f32(a);
12465 }
12466 
12467 // CHECK-LABEL: @test_vreinterpretq_s16_p8(
12468 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12469 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_s16_p8(poly8x16_t a)12470 int16x8_t test_vreinterpretq_s16_p8(poly8x16_t a) {
12471   return vreinterpretq_s16_p8(a);
12472 }
12473 
12474 // CHECK-LABEL: @test_vreinterpretq_s16_p16(
12475 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_s16_p16(poly16x8_t a)12476 int16x8_t test_vreinterpretq_s16_p16(poly16x8_t a) {
12477   return vreinterpretq_s16_p16(a);
12478 }
12479 
12480 // CHECK-LABEL: @test_vreinterpretq_s32_s8(
12481 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12482 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s8(int8x16_t a)12483 int32x4_t test_vreinterpretq_s32_s8(int8x16_t a) {
12484   return vreinterpretq_s32_s8(a);
12485 }
12486 
12487 // CHECK-LABEL: @test_vreinterpretq_s32_s16(
12488 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12489 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s16(int16x8_t a)12490 int32x4_t test_vreinterpretq_s32_s16(int16x8_t a) {
12491   return vreinterpretq_s32_s16(a);
12492 }
12493 
12494 // CHECK-LABEL: @test_vreinterpretq_s32_s64(
12495 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12496 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_s64(int64x2_t a)12497 int32x4_t test_vreinterpretq_s32_s64(int64x2_t a) {
12498   return vreinterpretq_s32_s64(a);
12499 }
12500 
12501 // CHECK-LABEL: @test_vreinterpretq_s32_u8(
12502 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12503 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u8(uint8x16_t a)12504 int32x4_t test_vreinterpretq_s32_u8(uint8x16_t a) {
12505   return vreinterpretq_s32_u8(a);
12506 }
12507 
12508 // CHECK-LABEL: @test_vreinterpretq_s32_u16(
12509 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12510 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u16(uint16x8_t a)12511 int32x4_t test_vreinterpretq_s32_u16(uint16x8_t a) {
12512   return vreinterpretq_s32_u16(a);
12513 }
12514 
12515 // CHECK-LABEL: @test_vreinterpretq_s32_u32(
12516 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_s32_u32(uint32x4_t a)12517 int32x4_t test_vreinterpretq_s32_u32(uint32x4_t a) {
12518   return vreinterpretq_s32_u32(a);
12519 }
12520 
12521 // CHECK-LABEL: @test_vreinterpretq_s32_u64(
12522 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12523 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_u64(uint64x2_t a)12524 int32x4_t test_vreinterpretq_s32_u64(uint64x2_t a) {
12525   return vreinterpretq_s32_u64(a);
12526 }
12527 
12528 // CHECK-LABEL: @test_vreinterpretq_s32_f16(
12529 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12530 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f16(float16x8_t a)12531 int32x4_t test_vreinterpretq_s32_f16(float16x8_t a) {
12532   return vreinterpretq_s32_f16(a);
12533 }
12534 
12535 // CHECK-LABEL: @test_vreinterpretq_s32_f32(
12536 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12537 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_f32(float32x4_t a)12538 int32x4_t test_vreinterpretq_s32_f32(float32x4_t a) {
12539   return vreinterpretq_s32_f32(a);
12540 }
12541 
12542 // CHECK-LABEL: @test_vreinterpretq_s32_p8(
12543 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12544 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p8(poly8x16_t a)12545 int32x4_t test_vreinterpretq_s32_p8(poly8x16_t a) {
12546   return vreinterpretq_s32_p8(a);
12547 }
12548 
12549 // CHECK-LABEL: @test_vreinterpretq_s32_p16(
12550 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12551 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_s32_p16(poly16x8_t a)12552 int32x4_t test_vreinterpretq_s32_p16(poly16x8_t a) {
12553   return vreinterpretq_s32_p16(a);
12554 }
12555 
12556 // CHECK-LABEL: @test_vreinterpretq_s64_s8(
12557 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12558 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s8(int8x16_t a)12559 int64x2_t test_vreinterpretq_s64_s8(int8x16_t a) {
12560   return vreinterpretq_s64_s8(a);
12561 }
12562 
12563 // CHECK-LABEL: @test_vreinterpretq_s64_s16(
12564 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12565 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s16(int16x8_t a)12566 int64x2_t test_vreinterpretq_s64_s16(int16x8_t a) {
12567   return vreinterpretq_s64_s16(a);
12568 }
12569 
12570 // CHECK-LABEL: @test_vreinterpretq_s64_s32(
12571 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12572 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_s32(int32x4_t a)12573 int64x2_t test_vreinterpretq_s64_s32(int32x4_t a) {
12574   return vreinterpretq_s64_s32(a);
12575 }
12576 
12577 // CHECK-LABEL: @test_vreinterpretq_s64_u8(
12578 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12579 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u8(uint8x16_t a)12580 int64x2_t test_vreinterpretq_s64_u8(uint8x16_t a) {
12581   return vreinterpretq_s64_u8(a);
12582 }
12583 
12584 // CHECK-LABEL: @test_vreinterpretq_s64_u16(
12585 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12586 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u16(uint16x8_t a)12587 int64x2_t test_vreinterpretq_s64_u16(uint16x8_t a) {
12588   return vreinterpretq_s64_u16(a);
12589 }
12590 
12591 // CHECK-LABEL: @test_vreinterpretq_s64_u32(
12592 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12593 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_u32(uint32x4_t a)12594 int64x2_t test_vreinterpretq_s64_u32(uint32x4_t a) {
12595   return vreinterpretq_s64_u32(a);
12596 }
12597 
12598 // CHECK-LABEL: @test_vreinterpretq_s64_u64(
12599 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_s64_u64(uint64x2_t a)12600 int64x2_t test_vreinterpretq_s64_u64(uint64x2_t a) {
12601   return vreinterpretq_s64_u64(a);
12602 }
12603 
12604 // CHECK-LABEL: @test_vreinterpretq_s64_f16(
12605 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12606 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f16(float16x8_t a)12607 int64x2_t test_vreinterpretq_s64_f16(float16x8_t a) {
12608   return vreinterpretq_s64_f16(a);
12609 }
12610 
12611 // CHECK-LABEL: @test_vreinterpretq_s64_f32(
12612 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12613 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_f32(float32x4_t a)12614 int64x2_t test_vreinterpretq_s64_f32(float32x4_t a) {
12615   return vreinterpretq_s64_f32(a);
12616 }
12617 
12618 // CHECK-LABEL: @test_vreinterpretq_s64_p8(
12619 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12620 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p8(poly8x16_t a)12621 int64x2_t test_vreinterpretq_s64_p8(poly8x16_t a) {
12622   return vreinterpretq_s64_p8(a);
12623 }
12624 
12625 // CHECK-LABEL: @test_vreinterpretq_s64_p16(
12626 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12627 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_s64_p16(poly16x8_t a)12628 int64x2_t test_vreinterpretq_s64_p16(poly16x8_t a) {
12629   return vreinterpretq_s64_p16(a);
12630 }
12631 
12632 // CHECK-LABEL: @test_vreinterpretq_u8_s8(
12633 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_s8(int8x16_t a)12634 uint8x16_t test_vreinterpretq_u8_s8(int8x16_t a) {
12635   return vreinterpretq_u8_s8(a);
12636 }
12637 
12638 // CHECK-LABEL: @test_vreinterpretq_u8_s16(
12639 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12640 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s16(int16x8_t a)12641 uint8x16_t test_vreinterpretq_u8_s16(int16x8_t a) {
12642   return vreinterpretq_u8_s16(a);
12643 }
12644 
12645 // CHECK-LABEL: @test_vreinterpretq_u8_s32(
12646 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12647 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s32(int32x4_t a)12648 uint8x16_t test_vreinterpretq_u8_s32(int32x4_t a) {
12649   return vreinterpretq_u8_s32(a);
12650 }
12651 
12652 // CHECK-LABEL: @test_vreinterpretq_u8_s64(
12653 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12654 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_s64(int64x2_t a)12655 uint8x16_t test_vreinterpretq_u8_s64(int64x2_t a) {
12656   return vreinterpretq_u8_s64(a);
12657 }
12658 
12659 // CHECK-LABEL: @test_vreinterpretq_u8_u16(
12660 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12661 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u16(uint16x8_t a)12662 uint8x16_t test_vreinterpretq_u8_u16(uint16x8_t a) {
12663   return vreinterpretq_u8_u16(a);
12664 }
12665 
12666 // CHECK-LABEL: @test_vreinterpretq_u8_u32(
12667 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
12668 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u32(uint32x4_t a)12669 uint8x16_t test_vreinterpretq_u8_u32(uint32x4_t a) {
12670   return vreinterpretq_u8_u32(a);
12671 }
12672 
12673 // CHECK-LABEL: @test_vreinterpretq_u8_u64(
12674 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
12675 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_u64(uint64x2_t a)12676 uint8x16_t test_vreinterpretq_u8_u64(uint64x2_t a) {
12677   return vreinterpretq_u8_u64(a);
12678 }
12679 
12680 // CHECK-LABEL: @test_vreinterpretq_u8_f16(
12681 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
12682 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f16(float16x8_t a)12683 uint8x16_t test_vreinterpretq_u8_f16(float16x8_t a) {
12684   return vreinterpretq_u8_f16(a);
12685 }
12686 
12687 // CHECK-LABEL: @test_vreinterpretq_u8_f32(
12688 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
12689 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_f32(float32x4_t a)12690 uint8x16_t test_vreinterpretq_u8_f32(float32x4_t a) {
12691   return vreinterpretq_u8_f32(a);
12692 }
12693 
12694 // CHECK-LABEL: @test_vreinterpretq_u8_p8(
12695 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_u8_p8(poly8x16_t a)12696 uint8x16_t test_vreinterpretq_u8_p8(poly8x16_t a) {
12697   return vreinterpretq_u8_p8(a);
12698 }
12699 
12700 // CHECK-LABEL: @test_vreinterpretq_u8_p16(
12701 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
12702 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_u8_p16(poly16x8_t a)12703 uint8x16_t test_vreinterpretq_u8_p16(poly16x8_t a) {
12704   return vreinterpretq_u8_p16(a);
12705 }
12706 
12707 // CHECK-LABEL: @test_vreinterpretq_u16_s8(
12708 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12709 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s8(int8x16_t a)12710 uint16x8_t test_vreinterpretq_u16_s8(int8x16_t a) {
12711   return vreinterpretq_u16_s8(a);
12712 }
12713 
12714 // CHECK-LABEL: @test_vreinterpretq_u16_s16(
12715 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_s16(int16x8_t a)12716 uint16x8_t test_vreinterpretq_u16_s16(int16x8_t a) {
12717   return vreinterpretq_u16_s16(a);
12718 }
12719 
12720 // CHECK-LABEL: @test_vreinterpretq_u16_s32(
12721 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12722 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s32(int32x4_t a)12723 uint16x8_t test_vreinterpretq_u16_s32(int32x4_t a) {
12724   return vreinterpretq_u16_s32(a);
12725 }
12726 
12727 // CHECK-LABEL: @test_vreinterpretq_u16_s64(
12728 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12729 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_s64(int64x2_t a)12730 uint16x8_t test_vreinterpretq_u16_s64(int64x2_t a) {
12731   return vreinterpretq_u16_s64(a);
12732 }
12733 
12734 // CHECK-LABEL: @test_vreinterpretq_u16_u8(
12735 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12736 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u8(uint8x16_t a)12737 uint16x8_t test_vreinterpretq_u16_u8(uint8x16_t a) {
12738   return vreinterpretq_u16_u8(a);
12739 }
12740 
12741 // CHECK-LABEL: @test_vreinterpretq_u16_u32(
12742 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
12743 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u32(uint32x4_t a)12744 uint16x8_t test_vreinterpretq_u16_u32(uint32x4_t a) {
12745   return vreinterpretq_u16_u32(a);
12746 }
12747 
12748 // CHECK-LABEL: @test_vreinterpretq_u16_u64(
12749 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
12750 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_u64(uint64x2_t a)12751 uint16x8_t test_vreinterpretq_u16_u64(uint64x2_t a) {
12752   return vreinterpretq_u16_u64(a);
12753 }
12754 
12755 // CHECK-LABEL: @test_vreinterpretq_u16_f16(
12756 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
12757 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f16(float16x8_t a)12758 uint16x8_t test_vreinterpretq_u16_f16(float16x8_t a) {
12759   return vreinterpretq_u16_f16(a);
12760 }
12761 
12762 // CHECK-LABEL: @test_vreinterpretq_u16_f32(
12763 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
12764 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_f32(float32x4_t a)12765 uint16x8_t test_vreinterpretq_u16_f32(float32x4_t a) {
12766   return vreinterpretq_u16_f32(a);
12767 }
12768 
12769 // CHECK-LABEL: @test_vreinterpretq_u16_p8(
12770 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
12771 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_u16_p8(poly8x16_t a)12772 uint16x8_t test_vreinterpretq_u16_p8(poly8x16_t a) {
12773   return vreinterpretq_u16_p8(a);
12774 }
12775 
12776 // CHECK-LABEL: @test_vreinterpretq_u16_p16(
12777 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_u16_p16(poly16x8_t a)12778 uint16x8_t test_vreinterpretq_u16_p16(poly16x8_t a) {
12779   return vreinterpretq_u16_p16(a);
12780 }
12781 
12782 // CHECK-LABEL: @test_vreinterpretq_u32_s8(
12783 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12784 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s8(int8x16_t a)12785 uint32x4_t test_vreinterpretq_u32_s8(int8x16_t a) {
12786   return vreinterpretq_u32_s8(a);
12787 }
12788 
12789 // CHECK-LABEL: @test_vreinterpretq_u32_s16(
12790 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12791 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s16(int16x8_t a)12792 uint32x4_t test_vreinterpretq_u32_s16(int16x8_t a) {
12793   return vreinterpretq_u32_s16(a);
12794 }
12795 
12796 // CHECK-LABEL: @test_vreinterpretq_u32_s32(
12797 // CHECK:   ret <4 x i32> %a
test_vreinterpretq_u32_s32(int32x4_t a)12798 uint32x4_t test_vreinterpretq_u32_s32(int32x4_t a) {
12799   return vreinterpretq_u32_s32(a);
12800 }
12801 
12802 // CHECK-LABEL: @test_vreinterpretq_u32_s64(
12803 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12804 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_s64(int64x2_t a)12805 uint32x4_t test_vreinterpretq_u32_s64(int64x2_t a) {
12806   return vreinterpretq_u32_s64(a);
12807 }
12808 
12809 // CHECK-LABEL: @test_vreinterpretq_u32_u8(
12810 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12811 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u8(uint8x16_t a)12812 uint32x4_t test_vreinterpretq_u32_u8(uint8x16_t a) {
12813   return vreinterpretq_u32_u8(a);
12814 }
12815 
12816 // CHECK-LABEL: @test_vreinterpretq_u32_u16(
12817 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12818 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u16(uint16x8_t a)12819 uint32x4_t test_vreinterpretq_u32_u16(uint16x8_t a) {
12820   return vreinterpretq_u32_u16(a);
12821 }
12822 
12823 // CHECK-LABEL: @test_vreinterpretq_u32_u64(
12824 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x i32>
12825 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_u64(uint64x2_t a)12826 uint32x4_t test_vreinterpretq_u32_u64(uint64x2_t a) {
12827   return vreinterpretq_u32_u64(a);
12828 }
12829 
12830 // CHECK-LABEL: @test_vreinterpretq_u32_f16(
12831 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x i32>
12832 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f16(float16x8_t a)12833 uint32x4_t test_vreinterpretq_u32_f16(float16x8_t a) {
12834   return vreinterpretq_u32_f16(a);
12835 }
12836 
12837 // CHECK-LABEL: @test_vreinterpretq_u32_f32(
12838 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <4 x i32>
12839 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_f32(float32x4_t a)12840 uint32x4_t test_vreinterpretq_u32_f32(float32x4_t a) {
12841   return vreinterpretq_u32_f32(a);
12842 }
12843 
12844 // CHECK-LABEL: @test_vreinterpretq_u32_p8(
12845 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x i32>
12846 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p8(poly8x16_t a)12847 uint32x4_t test_vreinterpretq_u32_p8(poly8x16_t a) {
12848   return vreinterpretq_u32_p8(a);
12849 }
12850 
12851 // CHECK-LABEL: @test_vreinterpretq_u32_p16(
12852 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x i32>
12853 // CHECK:   ret <4 x i32> [[TMP0]]
test_vreinterpretq_u32_p16(poly16x8_t a)12854 uint32x4_t test_vreinterpretq_u32_p16(poly16x8_t a) {
12855   return vreinterpretq_u32_p16(a);
12856 }
12857 
12858 // CHECK-LABEL: @test_vreinterpretq_u64_s8(
12859 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12860 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s8(int8x16_t a)12861 uint64x2_t test_vreinterpretq_u64_s8(int8x16_t a) {
12862   return vreinterpretq_u64_s8(a);
12863 }
12864 
12865 // CHECK-LABEL: @test_vreinterpretq_u64_s16(
12866 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12867 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s16(int16x8_t a)12868 uint64x2_t test_vreinterpretq_u64_s16(int16x8_t a) {
12869   return vreinterpretq_u64_s16(a);
12870 }
12871 
12872 // CHECK-LABEL: @test_vreinterpretq_u64_s32(
12873 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12874 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_s32(int32x4_t a)12875 uint64x2_t test_vreinterpretq_u64_s32(int32x4_t a) {
12876   return vreinterpretq_u64_s32(a);
12877 }
12878 
12879 // CHECK-LABEL: @test_vreinterpretq_u64_s64(
12880 // CHECK:   ret <2 x i64> %a
test_vreinterpretq_u64_s64(int64x2_t a)12881 uint64x2_t test_vreinterpretq_u64_s64(int64x2_t a) {
12882   return vreinterpretq_u64_s64(a);
12883 }
12884 
12885 // CHECK-LABEL: @test_vreinterpretq_u64_u8(
12886 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12887 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u8(uint8x16_t a)12888 uint64x2_t test_vreinterpretq_u64_u8(uint8x16_t a) {
12889   return vreinterpretq_u64_u8(a);
12890 }
12891 
12892 // CHECK-LABEL: @test_vreinterpretq_u64_u16(
12893 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12894 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u16(uint16x8_t a)12895 uint64x2_t test_vreinterpretq_u64_u16(uint16x8_t a) {
12896   return vreinterpretq_u64_u16(a);
12897 }
12898 
12899 // CHECK-LABEL: @test_vreinterpretq_u64_u32(
12900 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <2 x i64>
12901 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_u32(uint32x4_t a)12902 uint64x2_t test_vreinterpretq_u64_u32(uint32x4_t a) {
12903   return vreinterpretq_u64_u32(a);
12904 }
12905 
12906 // CHECK-LABEL: @test_vreinterpretq_u64_f16(
12907 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <2 x i64>
12908 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f16(float16x8_t a)12909 uint64x2_t test_vreinterpretq_u64_f16(float16x8_t a) {
12910   return vreinterpretq_u64_f16(a);
12911 }
12912 
12913 // CHECK-LABEL: @test_vreinterpretq_u64_f32(
12914 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <2 x i64>
12915 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_f32(float32x4_t a)12916 uint64x2_t test_vreinterpretq_u64_f32(float32x4_t a) {
12917   return vreinterpretq_u64_f32(a);
12918 }
12919 
12920 // CHECK-LABEL: @test_vreinterpretq_u64_p8(
12921 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <2 x i64>
12922 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p8(poly8x16_t a)12923 uint64x2_t test_vreinterpretq_u64_p8(poly8x16_t a) {
12924   return vreinterpretq_u64_p8(a);
12925 }
12926 
12927 // CHECK-LABEL: @test_vreinterpretq_u64_p16(
12928 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <2 x i64>
12929 // CHECK:   ret <2 x i64> [[TMP0]]
test_vreinterpretq_u64_p16(poly16x8_t a)12930 uint64x2_t test_vreinterpretq_u64_p16(poly16x8_t a) {
12931   return vreinterpretq_u64_p16(a);
12932 }
12933 
12934 // CHECK-LABEL: @test_vreinterpretq_f16_s8(
12935 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12936 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s8(int8x16_t a)12937 float16x8_t test_vreinterpretq_f16_s8(int8x16_t a) {
12938   return vreinterpretq_f16_s8(a);
12939 }
12940 
12941 // CHECK-LABEL: @test_vreinterpretq_f16_s16(
12942 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12943 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s16(int16x8_t a)12944 float16x8_t test_vreinterpretq_f16_s16(int16x8_t a) {
12945   return vreinterpretq_f16_s16(a);
12946 }
12947 
12948 // CHECK-LABEL: @test_vreinterpretq_f16_s32(
12949 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12950 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s32(int32x4_t a)12951 float16x8_t test_vreinterpretq_f16_s32(int32x4_t a) {
12952   return vreinterpretq_f16_s32(a);
12953 }
12954 
12955 // CHECK-LABEL: @test_vreinterpretq_f16_s64(
12956 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12957 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_s64(int64x2_t a)12958 float16x8_t test_vreinterpretq_f16_s64(int64x2_t a) {
12959   return vreinterpretq_f16_s64(a);
12960 }
12961 
12962 // CHECK-LABEL: @test_vreinterpretq_f16_u8(
12963 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12964 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u8(uint8x16_t a)12965 float16x8_t test_vreinterpretq_f16_u8(uint8x16_t a) {
12966   return vreinterpretq_f16_u8(a);
12967 }
12968 
12969 // CHECK-LABEL: @test_vreinterpretq_f16_u16(
12970 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
12971 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u16(uint16x8_t a)12972 float16x8_t test_vreinterpretq_f16_u16(uint16x8_t a) {
12973   return vreinterpretq_f16_u16(a);
12974 }
12975 
12976 // CHECK-LABEL: @test_vreinterpretq_f16_u32(
12977 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x half>
12978 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u32(uint32x4_t a)12979 float16x8_t test_vreinterpretq_f16_u32(uint32x4_t a) {
12980   return vreinterpretq_f16_u32(a);
12981 }
12982 
12983 // CHECK-LABEL: @test_vreinterpretq_f16_u64(
12984 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x half>
12985 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_u64(uint64x2_t a)12986 float16x8_t test_vreinterpretq_f16_u64(uint64x2_t a) {
12987   return vreinterpretq_f16_u64(a);
12988 }
12989 
12990 // CHECK-LABEL: @test_vreinterpretq_f16_f32(
12991 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x half>
12992 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_f32(float32x4_t a)12993 float16x8_t test_vreinterpretq_f16_f32(float32x4_t a) {
12994   return vreinterpretq_f16_f32(a);
12995 }
12996 
12997 // CHECK-LABEL: @test_vreinterpretq_f16_p8(
12998 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x half>
12999 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p8(poly8x16_t a)13000 float16x8_t test_vreinterpretq_f16_p8(poly8x16_t a) {
13001   return vreinterpretq_f16_p8(a);
13002 }
13003 
13004 // CHECK-LABEL: @test_vreinterpretq_f16_p16(
13005 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <8 x half>
13006 // CHECK:   ret <8 x half> [[TMP0]]
test_vreinterpretq_f16_p16(poly16x8_t a)13007 float16x8_t test_vreinterpretq_f16_p16(poly16x8_t a) {
13008   return vreinterpretq_f16_p16(a);
13009 }
13010 
13011 // CHECK-LABEL: @test_vreinterpretq_f32_s8(
13012 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13013 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s8(int8x16_t a)13014 float32x4_t test_vreinterpretq_f32_s8(int8x16_t a) {
13015   return vreinterpretq_f32_s8(a);
13016 }
13017 
13018 // CHECK-LABEL: @test_vreinterpretq_f32_s16(
13019 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13020 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s16(int16x8_t a)13021 float32x4_t test_vreinterpretq_f32_s16(int16x8_t a) {
13022   return vreinterpretq_f32_s16(a);
13023 }
13024 
13025 // CHECK-LABEL: @test_vreinterpretq_f32_s32(
13026 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
13027 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s32(int32x4_t a)13028 float32x4_t test_vreinterpretq_f32_s32(int32x4_t a) {
13029   return vreinterpretq_f32_s32(a);
13030 }
13031 
13032 // CHECK-LABEL: @test_vreinterpretq_f32_s64(
13033 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
13034 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_s64(int64x2_t a)13035 float32x4_t test_vreinterpretq_f32_s64(int64x2_t a) {
13036   return vreinterpretq_f32_s64(a);
13037 }
13038 
13039 // CHECK-LABEL: @test_vreinterpretq_f32_u8(
13040 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13041 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u8(uint8x16_t a)13042 float32x4_t test_vreinterpretq_f32_u8(uint8x16_t a) {
13043   return vreinterpretq_f32_u8(a);
13044 }
13045 
13046 // CHECK-LABEL: @test_vreinterpretq_f32_u16(
13047 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13048 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u16(uint16x8_t a)13049 float32x4_t test_vreinterpretq_f32_u16(uint16x8_t a) {
13050   return vreinterpretq_f32_u16(a);
13051 }
13052 
13053 // CHECK-LABEL: @test_vreinterpretq_f32_u32(
13054 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <4 x float>
13055 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u32(uint32x4_t a)13056 float32x4_t test_vreinterpretq_f32_u32(uint32x4_t a) {
13057   return vreinterpretq_f32_u32(a);
13058 }
13059 
13060 // CHECK-LABEL: @test_vreinterpretq_f32_u64(
13061 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <4 x float>
13062 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_u64(uint64x2_t a)13063 float32x4_t test_vreinterpretq_f32_u64(uint64x2_t a) {
13064   return vreinterpretq_f32_u64(a);
13065 }
13066 
13067 // CHECK-LABEL: @test_vreinterpretq_f32_f16(
13068 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <4 x float>
13069 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_f16(float16x8_t a)13070 float32x4_t test_vreinterpretq_f32_f16(float16x8_t a) {
13071   return vreinterpretq_f32_f16(a);
13072 }
13073 
13074 // CHECK-LABEL: @test_vreinterpretq_f32_p8(
13075 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <4 x float>
13076 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p8(poly8x16_t a)13077 float32x4_t test_vreinterpretq_f32_p8(poly8x16_t a) {
13078   return vreinterpretq_f32_p8(a);
13079 }
13080 
13081 // CHECK-LABEL: @test_vreinterpretq_f32_p16(
13082 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <4 x float>
13083 // CHECK:   ret <4 x float> [[TMP0]]
test_vreinterpretq_f32_p16(poly16x8_t a)13084 float32x4_t test_vreinterpretq_f32_p16(poly16x8_t a) {
13085   return vreinterpretq_f32_p16(a);
13086 }
13087 
13088 // CHECK-LABEL: @test_vreinterpretq_p8_s8(
13089 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_s8(int8x16_t a)13090 poly8x16_t test_vreinterpretq_p8_s8(int8x16_t a) {
13091   return vreinterpretq_p8_s8(a);
13092 }
13093 
13094 // CHECK-LABEL: @test_vreinterpretq_p8_s16(
13095 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13096 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s16(int16x8_t a)13097 poly8x16_t test_vreinterpretq_p8_s16(int16x8_t a) {
13098   return vreinterpretq_p8_s16(a);
13099 }
13100 
13101 // CHECK-LABEL: @test_vreinterpretq_p8_s32(
13102 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13103 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s32(int32x4_t a)13104 poly8x16_t test_vreinterpretq_p8_s32(int32x4_t a) {
13105   return vreinterpretq_p8_s32(a);
13106 }
13107 
13108 // CHECK-LABEL: @test_vreinterpretq_p8_s64(
13109 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13110 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_s64(int64x2_t a)13111 poly8x16_t test_vreinterpretq_p8_s64(int64x2_t a) {
13112   return vreinterpretq_p8_s64(a);
13113 }
13114 
13115 // CHECK-LABEL: @test_vreinterpretq_p8_u8(
13116 // CHECK:   ret <16 x i8> %a
test_vreinterpretq_p8_u8(uint8x16_t a)13117 poly8x16_t test_vreinterpretq_p8_u8(uint8x16_t a) {
13118   return vreinterpretq_p8_u8(a);
13119 }
13120 
13121 // CHECK-LABEL: @test_vreinterpretq_p8_u16(
13122 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13123 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u16(uint16x8_t a)13124 poly8x16_t test_vreinterpretq_p8_u16(uint16x8_t a) {
13125   return vreinterpretq_p8_u16(a);
13126 }
13127 
13128 // CHECK-LABEL: @test_vreinterpretq_p8_u32(
13129 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13130 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u32(uint32x4_t a)13131 poly8x16_t test_vreinterpretq_p8_u32(uint32x4_t a) {
13132   return vreinterpretq_p8_u32(a);
13133 }
13134 
13135 // CHECK-LABEL: @test_vreinterpretq_p8_u64(
13136 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13137 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_u64(uint64x2_t a)13138 poly8x16_t test_vreinterpretq_p8_u64(uint64x2_t a) {
13139   return vreinterpretq_p8_u64(a);
13140 }
13141 
13142 // CHECK-LABEL: @test_vreinterpretq_p8_f16(
13143 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8>
13144 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f16(float16x8_t a)13145 poly8x16_t test_vreinterpretq_p8_f16(float16x8_t a) {
13146   return vreinterpretq_p8_f16(a);
13147 }
13148 
13149 // CHECK-LABEL: @test_vreinterpretq_p8_f32(
13150 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13151 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_f32(float32x4_t a)13152 poly8x16_t test_vreinterpretq_p8_f32(float32x4_t a) {
13153   return vreinterpretq_p8_f32(a);
13154 }
13155 
13156 // CHECK-LABEL: @test_vreinterpretq_p8_p16(
13157 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13158 // CHECK:   ret <16 x i8> [[TMP0]]
test_vreinterpretq_p8_p16(poly16x8_t a)13159 poly8x16_t test_vreinterpretq_p8_p16(poly16x8_t a) {
13160   return vreinterpretq_p8_p16(a);
13161 }
13162 
13163 // CHECK-LABEL: @test_vreinterpretq_p16_s8(
13164 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13165 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s8(int8x16_t a)13166 poly16x8_t test_vreinterpretq_p16_s8(int8x16_t a) {
13167   return vreinterpretq_p16_s8(a);
13168 }
13169 
13170 // CHECK-LABEL: @test_vreinterpretq_p16_s16(
13171 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_s16(int16x8_t a)13172 poly16x8_t test_vreinterpretq_p16_s16(int16x8_t a) {
13173   return vreinterpretq_p16_s16(a);
13174 }
13175 
13176 // CHECK-LABEL: @test_vreinterpretq_p16_s32(
13177 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13178 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s32(int32x4_t a)13179 poly16x8_t test_vreinterpretq_p16_s32(int32x4_t a) {
13180   return vreinterpretq_p16_s32(a);
13181 }
13182 
13183 // CHECK-LABEL: @test_vreinterpretq_p16_s64(
13184 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13185 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_s64(int64x2_t a)13186 poly16x8_t test_vreinterpretq_p16_s64(int64x2_t a) {
13187   return vreinterpretq_p16_s64(a);
13188 }
13189 
13190 // CHECK-LABEL: @test_vreinterpretq_p16_u8(
13191 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13192 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u8(uint8x16_t a)13193 poly16x8_t test_vreinterpretq_p16_u8(uint8x16_t a) {
13194   return vreinterpretq_p16_u8(a);
13195 }
13196 
13197 // CHECK-LABEL: @test_vreinterpretq_p16_u16(
13198 // CHECK:   ret <8 x i16> %a
test_vreinterpretq_p16_u16(uint16x8_t a)13199 poly16x8_t test_vreinterpretq_p16_u16(uint16x8_t a) {
13200   return vreinterpretq_p16_u16(a);
13201 }
13202 
13203 // CHECK-LABEL: @test_vreinterpretq_p16_u32(
13204 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <8 x i16>
13205 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u32(uint32x4_t a)13206 poly16x8_t test_vreinterpretq_p16_u32(uint32x4_t a) {
13207   return vreinterpretq_p16_u32(a);
13208 }
13209 
13210 // CHECK-LABEL: @test_vreinterpretq_p16_u64(
13211 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <8 x i16>
13212 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_u64(uint64x2_t a)13213 poly16x8_t test_vreinterpretq_p16_u64(uint64x2_t a) {
13214   return vreinterpretq_p16_u64(a);
13215 }
13216 
13217 // CHECK-LABEL: @test_vreinterpretq_p16_f16(
13218 // CHECK:   [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16>
13219 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f16(float16x8_t a)13220 poly16x8_t test_vreinterpretq_p16_f16(float16x8_t a) {
13221   return vreinterpretq_p16_f16(a);
13222 }
13223 
13224 // CHECK-LABEL: @test_vreinterpretq_p16_f32(
13225 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <8 x i16>
13226 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_f32(float32x4_t a)13227 poly16x8_t test_vreinterpretq_p16_f32(float32x4_t a) {
13228   return vreinterpretq_p16_f32(a);
13229 }
13230 
13231 // CHECK-LABEL: @test_vreinterpretq_p16_p8(
13232 // CHECK:   [[TMP0:%.*]] = bitcast <16 x i8> %a to <8 x i16>
13233 // CHECK:   ret <8 x i16> [[TMP0]]
test_vreinterpretq_p16_p8(poly8x16_t a)13234 poly16x8_t test_vreinterpretq_p16_p8(poly8x16_t a) {
13235   return vreinterpretq_p16_p8(a);
13236 }
13237 
13238 // CHECK-LABEL: @test_vrev16_s8(
13239 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13240 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_s8(int8x8_t a)13241 int8x8_t test_vrev16_s8(int8x8_t a) {
13242   return vrev16_s8(a);
13243 }
13244 
13245 // CHECK-LABEL: @test_vrev16_u8(
13246 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13247 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_u8(uint8x8_t a)13248 uint8x8_t test_vrev16_u8(uint8x8_t a) {
13249   return vrev16_u8(a);
13250 }
13251 
13252 // CHECK-LABEL: @test_vrev16_p8(
13253 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13254 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev16_p8(poly8x8_t a)13255 poly8x8_t test_vrev16_p8(poly8x8_t a) {
13256   return vrev16_p8(a);
13257 }
13258 
13259 // CHECK-LABEL: @test_vrev16q_s8(
13260 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13261 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_s8(int8x16_t a)13262 int8x16_t test_vrev16q_s8(int8x16_t a) {
13263   return vrev16q_s8(a);
13264 }
13265 
13266 // CHECK-LABEL: @test_vrev16q_u8(
13267 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13268 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_u8(uint8x16_t a)13269 uint8x16_t test_vrev16q_u8(uint8x16_t a) {
13270   return vrev16q_u8(a);
13271 }
13272 
13273 // CHECK-LABEL: @test_vrev16q_p8(
13274 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
13275 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev16q_p8(poly8x16_t a)13276 poly8x16_t test_vrev16q_p8(poly8x16_t a) {
13277   return vrev16q_p8(a);
13278 }
13279 
13280 // CHECK-LABEL: @test_vrev32_s8(
13281 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13282 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_s8(int8x8_t a)13283 int8x8_t test_vrev32_s8(int8x8_t a) {
13284   return vrev32_s8(a);
13285 }
13286 
13287 // CHECK-LABEL: @test_vrev32_s16(
13288 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13289 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_s16(int16x4_t a)13290 int16x4_t test_vrev32_s16(int16x4_t a) {
13291   return vrev32_s16(a);
13292 }
13293 
13294 // CHECK-LABEL: @test_vrev32_u8(
13295 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13296 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_u8(uint8x8_t a)13297 uint8x8_t test_vrev32_u8(uint8x8_t a) {
13298   return vrev32_u8(a);
13299 }
13300 
13301 // CHECK-LABEL: @test_vrev32_u16(
13302 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13303 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_u16(uint16x4_t a)13304 uint16x4_t test_vrev32_u16(uint16x4_t a) {
13305   return vrev32_u16(a);
13306 }
13307 
13308 // CHECK-LABEL: @test_vrev32_p8(
13309 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13310 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev32_p8(poly8x8_t a)13311 poly8x8_t test_vrev32_p8(poly8x8_t a) {
13312   return vrev32_p8(a);
13313 }
13314 
13315 // CHECK-LABEL: @test_vrev32_p16(
13316 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13317 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev32_p16(poly16x4_t a)13318 poly16x4_t test_vrev32_p16(poly16x4_t a) {
13319   return vrev32_p16(a);
13320 }
13321 
13322 // CHECK-LABEL: @test_vrev32q_s8(
13323 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13324 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_s8(int8x16_t a)13325 int8x16_t test_vrev32q_s8(int8x16_t a) {
13326   return vrev32q_s8(a);
13327 }
13328 
13329 // CHECK-LABEL: @test_vrev32q_s16(
13330 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13331 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_s16(int16x8_t a)13332 int16x8_t test_vrev32q_s16(int16x8_t a) {
13333   return vrev32q_s16(a);
13334 }
13335 
13336 // CHECK-LABEL: @test_vrev32q_u8(
13337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13338 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_u8(uint8x16_t a)13339 uint8x16_t test_vrev32q_u8(uint8x16_t a) {
13340   return vrev32q_u8(a);
13341 }
13342 
13343 // CHECK-LABEL: @test_vrev32q_u16(
13344 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13345 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_u16(uint16x8_t a)13346 uint16x8_t test_vrev32q_u16(uint16x8_t a) {
13347   return vrev32q_u16(a);
13348 }
13349 
13350 // CHECK-LABEL: @test_vrev32q_p8(
13351 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
13352 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev32q_p8(poly8x16_t a)13353 poly8x16_t test_vrev32q_p8(poly8x16_t a) {
13354   return vrev32q_p8(a);
13355 }
13356 
13357 // CHECK-LABEL: @test_vrev32q_p16(
13358 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
13359 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev32q_p16(poly16x8_t a)13360 poly16x8_t test_vrev32q_p16(poly16x8_t a) {
13361   return vrev32q_p16(a);
13362 }
13363 
13364 // CHECK-LABEL: @test_vrev64_s8(
13365 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13366 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_s8(int8x8_t a)13367 int8x8_t test_vrev64_s8(int8x8_t a) {
13368   return vrev64_s8(a);
13369 }
13370 
13371 // CHECK-LABEL: @test_vrev64_s16(
13372 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13373 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_s16(int16x4_t a)13374 int16x4_t test_vrev64_s16(int16x4_t a) {
13375   return vrev64_s16(a);
13376 }
13377 
13378 // CHECK-LABEL: @test_vrev64_s32(
13379 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13380 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_s32(int32x2_t a)13381 int32x2_t test_vrev64_s32(int32x2_t a) {
13382   return vrev64_s32(a);
13383 }
13384 
13385 // CHECK-LABEL: @test_vrev64_u8(
13386 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13387 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_u8(uint8x8_t a)13388 uint8x8_t test_vrev64_u8(uint8x8_t a) {
13389   return vrev64_u8(a);
13390 }
13391 
13392 // CHECK-LABEL: @test_vrev64_u16(
13393 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13394 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_u16(uint16x4_t a)13395 uint16x4_t test_vrev64_u16(uint16x4_t a) {
13396   return vrev64_u16(a);
13397 }
13398 
13399 // CHECK-LABEL: @test_vrev64_u32(
13400 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %a, <2 x i32> <i32 1, i32 0>
13401 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vrev64_u32(uint32x2_t a)13402 uint32x2_t test_vrev64_u32(uint32x2_t a) {
13403   return vrev64_u32(a);
13404 }
13405 
13406 // CHECK-LABEL: @test_vrev64_p8(
13407 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %a, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
13408 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vrev64_p8(poly8x8_t a)13409 poly8x8_t test_vrev64_p8(poly8x8_t a) {
13410   return vrev64_p8(a);
13411 }
13412 
13413 // CHECK-LABEL: @test_vrev64_p16(
13414 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %a, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
13415 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vrev64_p16(poly16x4_t a)13416 poly16x4_t test_vrev64_p16(poly16x4_t a) {
13417   return vrev64_p16(a);
13418 }
13419 
13420 // CHECK-LABEL: @test_vrev64_f32(
13421 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %a, <2 x i32> <i32 1, i32 0>
13422 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vrev64_f32(float32x2_t a)13423 float32x2_t test_vrev64_f32(float32x2_t a) {
13424   return vrev64_f32(a);
13425 }
13426 
13427 // CHECK-LABEL: @test_vrev64q_s8(
13428 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13429 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_s8(int8x16_t a)13430 int8x16_t test_vrev64q_s8(int8x16_t a) {
13431   return vrev64q_s8(a);
13432 }
13433 
13434 // CHECK-LABEL: @test_vrev64q_s16(
13435 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13436 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_s16(int16x8_t a)13437 int16x8_t test_vrev64q_s16(int16x8_t a) {
13438   return vrev64q_s16(a);
13439 }
13440 
13441 // CHECK-LABEL: @test_vrev64q_s32(
13442 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13443 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_s32(int32x4_t a)13444 int32x4_t test_vrev64q_s32(int32x4_t a) {
13445   return vrev64q_s32(a);
13446 }
13447 
13448 // CHECK-LABEL: @test_vrev64q_u8(
13449 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13450 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_u8(uint8x16_t a)13451 uint8x16_t test_vrev64q_u8(uint8x16_t a) {
13452   return vrev64q_u8(a);
13453 }
13454 
13455 // CHECK-LABEL: @test_vrev64q_u16(
13456 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13457 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_u16(uint16x8_t a)13458 uint16x8_t test_vrev64q_u16(uint16x8_t a) {
13459   return vrev64q_u16(a);
13460 }
13461 
13462 // CHECK-LABEL: @test_vrev64q_u32(
13463 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13464 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vrev64q_u32(uint32x4_t a)13465 uint32x4_t test_vrev64q_u32(uint32x4_t a) {
13466   return vrev64q_u32(a);
13467 }
13468 
13469 // CHECK-LABEL: @test_vrev64q_p8(
13470 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %a, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
13471 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vrev64q_p8(poly8x16_t a)13472 poly8x16_t test_vrev64q_p8(poly8x16_t a) {
13473   return vrev64q_p8(a);
13474 }
13475 
13476 // CHECK-LABEL: @test_vrev64q_p16(
13477 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %a, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
13478 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vrev64q_p16(poly16x8_t a)13479 poly16x8_t test_vrev64q_p16(poly16x8_t a) {
13480   return vrev64q_p16(a);
13481 }
13482 
13483 // CHECK-LABEL: @test_vrev64q_f32(
13484 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %a, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
13485 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vrev64q_f32(float32x4_t a)13486 float32x4_t test_vrev64q_f32(float32x4_t a) {
13487   return vrev64q_f32(a);
13488 }
13489 
13490 // CHECK-LABEL: @test_vrhadd_s8(
13491 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %a, <8 x i8> %b)
13492 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_s8(int8x8_t a,int8x8_t b)13493 int8x8_t test_vrhadd_s8(int8x8_t a, int8x8_t b) {
13494   return vrhadd_s8(a, b);
13495 }
13496 
13497 // CHECK-LABEL: @test_vrhadd_s16(
13498 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13499 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13500 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %a, <4 x i16> %b)
13501 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13502 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_s16(int16x4_t a,int16x4_t b)13503 int16x4_t test_vrhadd_s16(int16x4_t a, int16x4_t b) {
13504   return vrhadd_s16(a, b);
13505 }
13506 
13507 // CHECK-LABEL: @test_vrhadd_s32(
13508 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13509 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13510 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhadds.v2i32(<2 x i32> %a, <2 x i32> %b)
13511 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13512 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_s32(int32x2_t a,int32x2_t b)13513 int32x2_t test_vrhadd_s32(int32x2_t a, int32x2_t b) {
13514   return vrhadd_s32(a, b);
13515 }
13516 
13517 // CHECK-LABEL: @test_vrhadd_u8(
13518 // CHECK:   [[VRHADD_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %a, <8 x i8> %b)
13519 // CHECK:   ret <8 x i8> [[VRHADD_V_I]]
test_vrhadd_u8(uint8x8_t a,uint8x8_t b)13520 uint8x8_t test_vrhadd_u8(uint8x8_t a, uint8x8_t b) {
13521   return vrhadd_u8(a, b);
13522 }
13523 
13524 // CHECK-LABEL: @test_vrhadd_u16(
13525 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13526 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13527 // CHECK:   [[VRHADD_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %a, <4 x i16> %b)
13528 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <4 x i16> [[VRHADD_V2_I]] to <8 x i8>
13529 // CHECK:   ret <4 x i16> [[VRHADD_V2_I]]
test_vrhadd_u16(uint16x4_t a,uint16x4_t b)13530 uint16x4_t test_vrhadd_u16(uint16x4_t a, uint16x4_t b) {
13531   return vrhadd_u16(a, b);
13532 }
13533 
13534 // CHECK-LABEL: @test_vrhadd_u32(
13535 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13536 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13537 // CHECK:   [[VRHADD_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrhaddu.v2i32(<2 x i32> %a, <2 x i32> %b)
13538 // CHECK:   [[VRHADD_V3_I:%.*]] = bitcast <2 x i32> [[VRHADD_V2_I]] to <8 x i8>
13539 // CHECK:   ret <2 x i32> [[VRHADD_V2_I]]
test_vrhadd_u32(uint32x2_t a,uint32x2_t b)13540 uint32x2_t test_vrhadd_u32(uint32x2_t a, uint32x2_t b) {
13541   return vrhadd_u32(a, b);
13542 }
13543 
13544 // CHECK-LABEL: @test_vrhaddq_s8(
13545 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %a, <16 x i8> %b)
13546 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_s8(int8x16_t a,int8x16_t b)13547 int8x16_t test_vrhaddq_s8(int8x16_t a, int8x16_t b) {
13548   return vrhaddq_s8(a, b);
13549 }
13550 
13551 // CHECK-LABEL: @test_vrhaddq_s16(
13552 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13553 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13554 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a, <8 x i16> %b)
13555 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13556 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_s16(int16x8_t a,int16x8_t b)13557 int16x8_t test_vrhaddq_s16(int16x8_t a, int16x8_t b) {
13558   return vrhaddq_s16(a, b);
13559 }
13560 
13561 // CHECK-LABEL: @test_vrhaddq_s32(
13562 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13563 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13564 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhadds.v4i32(<4 x i32> %a, <4 x i32> %b)
13565 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13566 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_s32(int32x4_t a,int32x4_t b)13567 int32x4_t test_vrhaddq_s32(int32x4_t a, int32x4_t b) {
13568   return vrhaddq_s32(a, b);
13569 }
13570 
13571 // CHECK-LABEL: @test_vrhaddq_u8(
13572 // CHECK:   [[VRHADDQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %a, <16 x i8> %b)
13573 // CHECK:   ret <16 x i8> [[VRHADDQ_V_I]]
test_vrhaddq_u8(uint8x16_t a,uint8x16_t b)13574 uint8x16_t test_vrhaddq_u8(uint8x16_t a, uint8x16_t b) {
13575   return vrhaddq_u8(a, b);
13576 }
13577 
13578 // CHECK-LABEL: @test_vrhaddq_u16(
13579 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13580 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13581 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a, <8 x i16> %b)
13582 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <8 x i16> [[VRHADDQ_V2_I]] to <16 x i8>
13583 // CHECK:   ret <8 x i16> [[VRHADDQ_V2_I]]
test_vrhaddq_u16(uint16x8_t a,uint16x8_t b)13584 uint16x8_t test_vrhaddq_u16(uint16x8_t a, uint16x8_t b) {
13585   return vrhaddq_u16(a, b);
13586 }
13587 
13588 // CHECK-LABEL: @test_vrhaddq_u32(
13589 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13590 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13591 // CHECK:   [[VRHADDQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrhaddu.v4i32(<4 x i32> %a, <4 x i32> %b)
13592 // CHECK:   [[VRHADDQ_V3_I:%.*]] = bitcast <4 x i32> [[VRHADDQ_V2_I]] to <16 x i8>
13593 // CHECK:   ret <4 x i32> [[VRHADDQ_V2_I]]
test_vrhaddq_u32(uint32x4_t a,uint32x4_t b)13594 uint32x4_t test_vrhaddq_u32(uint32x4_t a, uint32x4_t b) {
13595   return vrhaddq_u32(a, b);
13596 }
13597 
13598 // CHECK-LABEL: @test_vrshl_s8(
13599 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
13600 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_s8(int8x8_t a,int8x8_t b)13601 int8x8_t test_vrshl_s8(int8x8_t a, int8x8_t b) {
13602   return vrshl_s8(a, b);
13603 }
13604 
13605 // CHECK-LABEL: @test_vrshl_s16(
13606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13608 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
13609 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13610 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_s16(int16x4_t a,int16x4_t b)13611 int16x4_t test_vrshl_s16(int16x4_t a, int16x4_t b) {
13612   return vrshl_s16(a, b);
13613 }
13614 
13615 // CHECK-LABEL: @test_vrshl_s32(
13616 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13617 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13618 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
13619 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13620 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_s32(int32x2_t a,int32x2_t b)13621 int32x2_t test_vrshl_s32(int32x2_t a, int32x2_t b) {
13622   return vrshl_s32(a, b);
13623 }
13624 
13625 // CHECK-LABEL: @test_vrshl_s64(
13626 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13627 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13628 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
13629 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13630 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_s64(int64x1_t a,int64x1_t b)13631 int64x1_t test_vrshl_s64(int64x1_t a, int64x1_t b) {
13632   return vrshl_s64(a, b);
13633 }
13634 
13635 // CHECK-LABEL: @test_vrshl_u8(
13636 // CHECK:   [[VRSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
13637 // CHECK:   ret <8 x i8> [[VRSHL_V_I]]
test_vrshl_u8(uint8x8_t a,int8x8_t b)13638 uint8x8_t test_vrshl_u8(uint8x8_t a, int8x8_t b) {
13639   return vrshl_u8(a, b);
13640 }
13641 
13642 // CHECK-LABEL: @test_vrshl_u16(
13643 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13644 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13645 // CHECK:   [[VRSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
13646 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <4 x i16> [[VRSHL_V2_I]] to <8 x i8>
13647 // CHECK:   ret <4 x i16> [[VRSHL_V2_I]]
test_vrshl_u16(uint16x4_t a,int16x4_t b)13648 uint16x4_t test_vrshl_u16(uint16x4_t a, int16x4_t b) {
13649   return vrshl_u16(a, b);
13650 }
13651 
13652 // CHECK-LABEL: @test_vrshl_u32(
13653 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13654 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
13655 // CHECK:   [[VRSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
13656 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <2 x i32> [[VRSHL_V2_I]] to <8 x i8>
13657 // CHECK:   ret <2 x i32> [[VRSHL_V2_I]]
test_vrshl_u32(uint32x2_t a,int32x2_t b)13658 uint32x2_t test_vrshl_u32(uint32x2_t a, int32x2_t b) {
13659   return vrshl_u32(a, b);
13660 }
13661 
13662 // CHECK-LABEL: @test_vrshl_u64(
13663 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13664 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
13665 // CHECK:   [[VRSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
13666 // CHECK:   [[VRSHL_V3_I:%.*]] = bitcast <1 x i64> [[VRSHL_V2_I]] to <8 x i8>
13667 // CHECK:   ret <1 x i64> [[VRSHL_V2_I]]
test_vrshl_u64(uint64x1_t a,int64x1_t b)13668 uint64x1_t test_vrshl_u64(uint64x1_t a, int64x1_t b) {
13669   return vrshl_u64(a, b);
13670 }
13671 
13672 // CHECK-LABEL: @test_vrshlq_s8(
13673 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
13674 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_s8(int8x16_t a,int8x16_t b)13675 int8x16_t test_vrshlq_s8(int8x16_t a, int8x16_t b) {
13676   return vrshlq_s8(a, b);
13677 }
13678 
13679 // CHECK-LABEL: @test_vrshlq_s16(
13680 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13681 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13682 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
13683 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13684 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_s16(int16x8_t a,int16x8_t b)13685 int16x8_t test_vrshlq_s16(int16x8_t a, int16x8_t b) {
13686   return vrshlq_s16(a, b);
13687 }
13688 
13689 // CHECK-LABEL: @test_vrshlq_s32(
13690 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13691 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13692 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
13693 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13694 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_s32(int32x4_t a,int32x4_t b)13695 int32x4_t test_vrshlq_s32(int32x4_t a, int32x4_t b) {
13696   return vrshlq_s32(a, b);
13697 }
13698 
13699 // CHECK-LABEL: @test_vrshlq_s64(
13700 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13701 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13702 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
13703 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13704 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_s64(int64x2_t a,int64x2_t b)13705 int64x2_t test_vrshlq_s64(int64x2_t a, int64x2_t b) {
13706   return vrshlq_s64(a, b);
13707 }
13708 
13709 // CHECK-LABEL: @test_vrshlq_u8(
13710 // CHECK:   [[VRSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
13711 // CHECK:   ret <16 x i8> [[VRSHLQ_V_I]]
test_vrshlq_u8(uint8x16_t a,int8x16_t b)13712 uint8x16_t test_vrshlq_u8(uint8x16_t a, int8x16_t b) {
13713   return vrshlq_u8(a, b);
13714 }
13715 
13716 // CHECK-LABEL: @test_vrshlq_u16(
13717 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13718 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
13719 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
13720 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VRSHLQ_V2_I]] to <16 x i8>
13721 // CHECK:   ret <8 x i16> [[VRSHLQ_V2_I]]
test_vrshlq_u16(uint16x8_t a,int16x8_t b)13722 uint16x8_t test_vrshlq_u16(uint16x8_t a, int16x8_t b) {
13723   return vrshlq_u16(a, b);
13724 }
13725 
13726 // CHECK-LABEL: @test_vrshlq_u32(
13727 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13728 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
13729 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
13730 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VRSHLQ_V2_I]] to <16 x i8>
13731 // CHECK:   ret <4 x i32> [[VRSHLQ_V2_I]]
test_vrshlq_u32(uint32x4_t a,int32x4_t b)13732 uint32x4_t test_vrshlq_u32(uint32x4_t a, int32x4_t b) {
13733   return vrshlq_u32(a, b);
13734 }
13735 
13736 // CHECK-LABEL: @test_vrshlq_u64(
13737 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13738 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
13739 // CHECK:   [[VRSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
13740 // CHECK:   [[VRSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VRSHLQ_V2_I]] to <16 x i8>
13741 // CHECK:   ret <2 x i64> [[VRSHLQ_V2_I]]
test_vrshlq_u64(uint64x2_t a,int64x2_t b)13742 uint64x2_t test_vrshlq_u64(uint64x2_t a, int64x2_t b) {
13743   return vrshlq_u64(a, b);
13744 }
13745 
13746 // CHECK-LABEL: @test_vrshrn_n_s16(
13747 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13748 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13749 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13750 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_s16(int16x8_t a)13751 int8x8_t test_vrshrn_n_s16(int16x8_t a) {
13752   return vrshrn_n_s16(a, 1);
13753 }
13754 
13755 // CHECK-LABEL: @test_vrshrn_n_s32(
13756 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13757 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13758 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13759 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_s32(int32x4_t a)13760 int16x4_t test_vrshrn_n_s32(int32x4_t a) {
13761   return vrshrn_n_s32(a, 1);
13762 }
13763 
13764 // CHECK-LABEL: @test_vrshrn_n_s64(
13765 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13766 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13767 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13768 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_s64(int64x2_t a)13769 int32x2_t test_vrshrn_n_s64(int64x2_t a) {
13770   return vrshrn_n_s64(a, 1);
13771 }
13772 
13773 // CHECK-LABEL: @test_vrshrn_n_u16(
13774 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13775 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13776 // CHECK:   [[VRSHRN_N1:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftn.v8i8(<8 x i16> [[VRSHRN_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13777 // CHECK:   ret <8 x i8> [[VRSHRN_N1]]
test_vrshrn_n_u16(uint16x8_t a)13778 uint8x8_t test_vrshrn_n_u16(uint16x8_t a) {
13779   return vrshrn_n_u16(a, 1);
13780 }
13781 
13782 // CHECK-LABEL: @test_vrshrn_n_u32(
13783 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13784 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13785 // CHECK:   [[VRSHRN_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftn.v4i16(<4 x i32> [[VRSHRN_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13786 // CHECK:   ret <4 x i16> [[VRSHRN_N1]]
test_vrshrn_n_u32(uint32x4_t a)13787 uint16x4_t test_vrshrn_n_u32(uint32x4_t a) {
13788   return vrshrn_n_u32(a, 1);
13789 }
13790 
13791 // CHECK-LABEL: @test_vrshrn_n_u64(
13792 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13793 // CHECK:   [[VRSHRN_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13794 // CHECK:   [[VRSHRN_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftn.v2i32(<2 x i64> [[VRSHRN_N]], <2 x i64> <i64 -1, i64 -1>)
13795 // CHECK:   ret <2 x i32> [[VRSHRN_N1]]
test_vrshrn_n_u64(uint64x2_t a)13796 uint32x2_t test_vrshrn_n_u64(uint64x2_t a) {
13797   return vrshrn_n_u64(a, 1);
13798 }
13799 
13800 // CHECK-LABEL: @test_vrshr_n_s8(
13801 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13802 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_s8(int8x8_t a)13803 int8x8_t test_vrshr_n_s8(int8x8_t a) {
13804   return vrshr_n_s8(a, 1);
13805 }
13806 
13807 // CHECK-LABEL: @test_vrshr_n_s16(
13808 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13809 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13810 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13811 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_s16(int16x4_t a)13812 int16x4_t test_vrshr_n_s16(int16x4_t a) {
13813   return vrshr_n_s16(a, 1);
13814 }
13815 
13816 // CHECK-LABEL: @test_vrshr_n_s32(
13817 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13818 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13819 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13820 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_s32(int32x2_t a)13821 int32x2_t test_vrshr_n_s32(int32x2_t a) {
13822   return vrshr_n_s32(a, 1);
13823 }
13824 
13825 // CHECK-LABEL: @test_vrshr_n_s64(
13826 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13827 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13828 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13829 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_s64(int64x1_t a)13830 int64x1_t test_vrshr_n_s64(int64x1_t a) {
13831   return vrshr_n_s64(a, 1);
13832 }
13833 
13834 // CHECK-LABEL: @test_vrshr_n_u8(
13835 // CHECK:   [[VRSHR_N:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %a, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13836 // CHECK:   ret <8 x i8> [[VRSHR_N]]
test_vrshr_n_u8(uint8x8_t a)13837 uint8x8_t test_vrshr_n_u8(uint8x8_t a) {
13838   return vrshr_n_u8(a, 1);
13839 }
13840 
13841 // CHECK-LABEL: @test_vrshr_n_u16(
13842 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13843 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
13844 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[VRSHR_N]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
13845 // CHECK:   ret <4 x i16> [[VRSHR_N1]]
test_vrshr_n_u16(uint16x4_t a)13846 uint16x4_t test_vrshr_n_u16(uint16x4_t a) {
13847   return vrshr_n_u16(a, 1);
13848 }
13849 
13850 // CHECK-LABEL: @test_vrshr_n_u32(
13851 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13852 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
13853 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[VRSHR_N]], <2 x i32> <i32 -1, i32 -1>)
13854 // CHECK:   ret <2 x i32> [[VRSHR_N1]]
test_vrshr_n_u32(uint32x2_t a)13855 uint32x2_t test_vrshr_n_u32(uint32x2_t a) {
13856   return vrshr_n_u32(a, 1);
13857 }
13858 
13859 // CHECK-LABEL: @test_vrshr_n_u64(
13860 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
13861 // CHECK:   [[VRSHR_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
13862 // CHECK:   [[VRSHR_N1:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[VRSHR_N]], <1 x i64> <i64 -1>)
13863 // CHECK:   ret <1 x i64> [[VRSHR_N1]]
test_vrshr_n_u64(uint64x1_t a)13864 uint64x1_t test_vrshr_n_u64(uint64x1_t a) {
13865   return vrshr_n_u64(a, 1);
13866 }
13867 
13868 // CHECK-LABEL: @test_vrshrq_n_s8(
13869 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13870 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_s8(int8x16_t a)13871 int8x16_t test_vrshrq_n_s8(int8x16_t a) {
13872   return vrshrq_n_s8(a, 1);
13873 }
13874 
13875 // CHECK-LABEL: @test_vrshrq_n_s16(
13876 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13877 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13878 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13879 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_s16(int16x8_t a)13880 int16x8_t test_vrshrq_n_s16(int16x8_t a) {
13881   return vrshrq_n_s16(a, 1);
13882 }
13883 
13884 // CHECK-LABEL: @test_vrshrq_n_s32(
13885 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13886 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13887 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13888 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_s32(int32x4_t a)13889 int32x4_t test_vrshrq_n_s32(int32x4_t a) {
13890   return vrshrq_n_s32(a, 1);
13891 }
13892 
13893 // CHECK-LABEL: @test_vrshrq_n_s64(
13894 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13895 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13896 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13897 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_s64(int64x2_t a)13898 int64x2_t test_vrshrq_n_s64(int64x2_t a) {
13899   return vrshrq_n_s64(a, 1);
13900 }
13901 
13902 // CHECK-LABEL: @test_vrshrq_n_u8(
13903 // CHECK:   [[VRSHR_N:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %a, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13904 // CHECK:   ret <16 x i8> [[VRSHR_N]]
test_vrshrq_n_u8(uint8x16_t a)13905 uint8x16_t test_vrshrq_n_u8(uint8x16_t a) {
13906   return vrshrq_n_u8(a, 1);
13907 }
13908 
13909 // CHECK-LABEL: @test_vrshrq_n_u16(
13910 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
13911 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
13912 // CHECK:   [[VRSHR_N1:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[VRSHR_N]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
13913 // CHECK:   ret <8 x i16> [[VRSHR_N1]]
test_vrshrq_n_u16(uint16x8_t a)13914 uint16x8_t test_vrshrq_n_u16(uint16x8_t a) {
13915   return vrshrq_n_u16(a, 1);
13916 }
13917 
13918 // CHECK-LABEL: @test_vrshrq_n_u32(
13919 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13920 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
13921 // CHECK:   [[VRSHR_N1:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[VRSHR_N]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
13922 // CHECK:   ret <4 x i32> [[VRSHR_N1]]
test_vrshrq_n_u32(uint32x4_t a)13923 uint32x4_t test_vrshrq_n_u32(uint32x4_t a) {
13924   return vrshrq_n_u32(a, 1);
13925 }
13926 
13927 // CHECK-LABEL: @test_vrshrq_n_u64(
13928 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
13929 // CHECK:   [[VRSHR_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
13930 // CHECK:   [[VRSHR_N1:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[VRSHR_N]], <2 x i64> <i64 -1, i64 -1>)
13931 // CHECK:   ret <2 x i64> [[VRSHR_N1]]
test_vrshrq_n_u64(uint64x2_t a)13932 uint64x2_t test_vrshrq_n_u64(uint64x2_t a) {
13933   return vrshrq_n_u64(a, 1);
13934 }
13935 
13936 // CHECK-LABEL: @test_vrsqrte_f32(
13937 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13938 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %a)
13939 // CHECK:   ret <2 x float> [[VRSQRTE_V1_I]]
test_vrsqrte_f32(float32x2_t a)13940 float32x2_t test_vrsqrte_f32(float32x2_t a) {
13941   return vrsqrte_f32(a);
13942 }
13943 
13944 // CHECK-LABEL: @test_vrsqrte_u32(
13945 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
13946 // CHECK:   [[VRSQRTE_V1_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32> %a)
13947 // CHECK:   ret <2 x i32> [[VRSQRTE_V1_I]]
test_vrsqrte_u32(uint32x2_t a)13948 uint32x2_t test_vrsqrte_u32(uint32x2_t a) {
13949   return vrsqrte_u32(a);
13950 }
13951 
13952 // CHECK-LABEL: @test_vrsqrteq_f32(
13953 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13954 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %a)
13955 // CHECK:   ret <4 x float> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_f32(float32x4_t a)13956 float32x4_t test_vrsqrteq_f32(float32x4_t a) {
13957   return vrsqrteq_f32(a);
13958 }
13959 
13960 // CHECK-LABEL: @test_vrsqrteq_u32(
13961 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
13962 // CHECK:   [[VRSQRTEQ_V1_I:%.*]] = call <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32> %a)
13963 // CHECK:   ret <4 x i32> [[VRSQRTEQ_V1_I]]
test_vrsqrteq_u32(uint32x4_t a)13964 uint32x4_t test_vrsqrteq_u32(uint32x4_t a) {
13965   return vrsqrteq_u32(a);
13966 }
13967 
13968 // CHECK-LABEL: @test_vrsqrts_f32(
13969 // CHECK:   [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8>
13970 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
13971 // CHECK:   [[VRSQRTS_V2_I:%.*]] = call <2 x float> @llvm.arm.neon.vrsqrts.v2f32(<2 x float> %a, <2 x float> %b)
13972 // CHECK:   [[VRSQRTS_V3_I:%.*]] = bitcast <2 x float> [[VRSQRTS_V2_I]] to <8 x i8>
13973 // CHECK:   ret <2 x float> [[VRSQRTS_V2_I]]
test_vrsqrts_f32(float32x2_t a,float32x2_t b)13974 float32x2_t test_vrsqrts_f32(float32x2_t a, float32x2_t b) {
13975   return vrsqrts_f32(a, b);
13976 }
13977 
13978 // CHECK-LABEL: @test_vrsqrtsq_f32(
13979 // CHECK:   [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8>
13980 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
13981 // CHECK:   [[VRSQRTSQ_V2_I:%.*]] = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %a, <4 x float> %b)
13982 // CHECK:   [[VRSQRTSQ_V3_I:%.*]] = bitcast <4 x float> [[VRSQRTSQ_V2_I]] to <16 x i8>
13983 // CHECK:   ret <4 x float> [[VRSQRTSQ_V2_I]]
test_vrsqrtsq_f32(float32x4_t a,float32x4_t b)13984 float32x4_t test_vrsqrtsq_f32(float32x4_t a, float32x4_t b) {
13985   return vrsqrtsq_f32(a, b);
13986 }
13987 
13988 // CHECK-LABEL: @test_vrsra_n_s8(
13989 // CHECK:   [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshifts.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
13990 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
13991 // CHECK:   ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_s8(int8x8_t a,int8x8_t b)13992 int8x8_t test_vrsra_n_s8(int8x8_t a, int8x8_t b) {
13993   return vrsra_n_s8(a, b, 1);
13994 }
13995 
13996 // CHECK-LABEL: @test_vrsra_n_s16(
13997 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
13998 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
13999 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14000 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14001 // CHECK:   [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshifts.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
14002 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
14003 // CHECK:   ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_s16(int16x4_t a,int16x4_t b)14004 int16x4_t test_vrsra_n_s16(int16x4_t a, int16x4_t b) {
14005   return vrsra_n_s16(a, b, 1);
14006 }
14007 
14008 // CHECK-LABEL: @test_vrsra_n_s32(
14009 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14010 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14011 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14012 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14013 // CHECK:   [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshifts.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
14014 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
14015 // CHECK:   ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_s32(int32x2_t a,int32x2_t b)14016 int32x2_t test_vrsra_n_s32(int32x2_t a, int32x2_t b) {
14017   return vrsra_n_s32(a, b, 1);
14018 }
14019 
14020 // CHECK-LABEL: @test_vrsra_n_s64(
14021 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14022 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14023 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14024 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14025 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshifts.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
14026 // CHECK:   [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
14027 // CHECK:   ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_s64(int64x1_t a,int64x1_t b)14028 int64x1_t test_vrsra_n_s64(int64x1_t a, int64x1_t b) {
14029   return vrsra_n_s64(a, b, 1);
14030 }
14031 
14032 // CHECK-LABEL: @test_vrsra_n_u8(
14033 // CHECK:   [[TMP0:%.*]] = call <8 x i8> @llvm.arm.neon.vrshiftu.v8i8(<8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14034 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i8> %a, [[TMP0]]
14035 // CHECK:   ret <8 x i8> [[VRSRA_N]]
test_vrsra_n_u8(uint8x8_t a,uint8x8_t b)14036 uint8x8_t test_vrsra_n_u8(uint8x8_t a, uint8x8_t b) {
14037   return vrsra_n_u8(a, b, 1);
14038 }
14039 
14040 // CHECK-LABEL: @test_vrsra_n_u16(
14041 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14042 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14043 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14044 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14045 // CHECK:   [[TMP4:%.*]] = call <4 x i16> @llvm.arm.neon.vrshiftu.v4i16(<4 x i16> [[TMP3]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
14046 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i16> [[TMP2]], [[TMP4]]
14047 // CHECK:   ret <4 x i16> [[VRSRA_N]]
test_vrsra_n_u16(uint16x4_t a,uint16x4_t b)14048 uint16x4_t test_vrsra_n_u16(uint16x4_t a, uint16x4_t b) {
14049   return vrsra_n_u16(a, b, 1);
14050 }
14051 
14052 // CHECK-LABEL: @test_vrsra_n_u32(
14053 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14054 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14055 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14056 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14057 // CHECK:   [[TMP4:%.*]] = call <2 x i32> @llvm.arm.neon.vrshiftu.v2i32(<2 x i32> [[TMP3]], <2 x i32> <i32 -1, i32 -1>)
14058 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i32> [[TMP2]], [[TMP4]]
14059 // CHECK:   ret <2 x i32> [[VRSRA_N]]
test_vrsra_n_u32(uint32x2_t a,uint32x2_t b)14060 uint32x2_t test_vrsra_n_u32(uint32x2_t a, uint32x2_t b) {
14061   return vrsra_n_u32(a, b, 1);
14062 }
14063 
14064 // CHECK-LABEL: @test_vrsra_n_u64(
14065 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14066 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14067 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14068 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14069 // CHECK:   [[TMP4:%.*]] = call <1 x i64> @llvm.arm.neon.vrshiftu.v1i64(<1 x i64> [[TMP3]], <1 x i64> <i64 -1>)
14070 // CHECK:   [[VRSRA_N:%.*]] = add <1 x i64> [[TMP2]], [[TMP4]]
14071 // CHECK:   ret <1 x i64> [[VRSRA_N]]
test_vrsra_n_u64(uint64x1_t a,uint64x1_t b)14072 uint64x1_t test_vrsra_n_u64(uint64x1_t a, uint64x1_t b) {
14073   return vrsra_n_u64(a, b, 1);
14074 }
14075 
14076 // CHECK-LABEL: @test_vrsraq_n_s8(
14077 // CHECK:   [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshifts.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14078 // CHECK:   [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14079 // CHECK:   ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_s8(int8x16_t a,int8x16_t b)14080 int8x16_t test_vrsraq_n_s8(int8x16_t a, int8x16_t b) {
14081   return vrsraq_n_s8(a, b, 1);
14082 }
14083 
14084 // CHECK-LABEL: @test_vrsraq_n_s16(
14085 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14086 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14087 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14088 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14089 // CHECK:   [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshifts.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14090 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14091 // CHECK:   ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_s16(int16x8_t a,int16x8_t b)14092 int16x8_t test_vrsraq_n_s16(int16x8_t a, int16x8_t b) {
14093   return vrsraq_n_s16(a, b, 1);
14094 }
14095 
14096 // CHECK-LABEL: @test_vrsraq_n_s32(
14097 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14098 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14099 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14100 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14101 // CHECK:   [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshifts.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14102 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14103 // CHECK:   ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_s32(int32x4_t a,int32x4_t b)14104 int32x4_t test_vrsraq_n_s32(int32x4_t a, int32x4_t b) {
14105   return vrsraq_n_s32(a, b, 1);
14106 }
14107 
14108 // CHECK-LABEL: @test_vrsraq_n_s64(
14109 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14110 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14111 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14112 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14113 // CHECK:   [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshifts.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14114 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14115 // CHECK:   ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_s64(int64x2_t a,int64x2_t b)14116 int64x2_t test_vrsraq_n_s64(int64x2_t a, int64x2_t b) {
14117   return vrsraq_n_s64(a, b, 1);
14118 }
14119 
14120 // CHECK-LABEL: @test_vrsraq_n_u8(
14121 // CHECK:   [[TMP0:%.*]] = call <16 x i8> @llvm.arm.neon.vrshiftu.v16i8(<16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
14122 // CHECK:   [[VRSRA_N:%.*]] = add <16 x i8> %a, [[TMP0]]
14123 // CHECK:   ret <16 x i8> [[VRSRA_N]]
test_vrsraq_n_u8(uint8x16_t a,uint8x16_t b)14124 uint8x16_t test_vrsraq_n_u8(uint8x16_t a, uint8x16_t b) {
14125   return vrsraq_n_u8(a, b, 1);
14126 }
14127 
14128 // CHECK-LABEL: @test_vrsraq_n_u16(
14129 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14130 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14131 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14132 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
14133 // CHECK:   [[TMP4:%.*]] = call <8 x i16> @llvm.arm.neon.vrshiftu.v8i16(<8 x i16> [[TMP3]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
14134 // CHECK:   [[VRSRA_N:%.*]] = add <8 x i16> [[TMP2]], [[TMP4]]
14135 // CHECK:   ret <8 x i16> [[VRSRA_N]]
test_vrsraq_n_u16(uint16x8_t a,uint16x8_t b)14136 uint16x8_t test_vrsraq_n_u16(uint16x8_t a, uint16x8_t b) {
14137   return vrsraq_n_u16(a, b, 1);
14138 }
14139 
14140 // CHECK-LABEL: @test_vrsraq_n_u32(
14141 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14142 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14143 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14144 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
14145 // CHECK:   [[TMP4:%.*]] = call <4 x i32> @llvm.arm.neon.vrshiftu.v4i32(<4 x i32> [[TMP3]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
14146 // CHECK:   [[VRSRA_N:%.*]] = add <4 x i32> [[TMP2]], [[TMP4]]
14147 // CHECK:   ret <4 x i32> [[VRSRA_N]]
test_vrsraq_n_u32(uint32x4_t a,uint32x4_t b)14148 uint32x4_t test_vrsraq_n_u32(uint32x4_t a, uint32x4_t b) {
14149   return vrsraq_n_u32(a, b, 1);
14150 }
14151 
14152 // CHECK-LABEL: @test_vrsraq_n_u64(
14153 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14154 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14155 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14156 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
14157 // CHECK:   [[TMP4:%.*]] = call <2 x i64> @llvm.arm.neon.vrshiftu.v2i64(<2 x i64> [[TMP3]], <2 x i64> <i64 -1, i64 -1>)
14158 // CHECK:   [[VRSRA_N:%.*]] = add <2 x i64> [[TMP2]], [[TMP4]]
14159 // CHECK:   ret <2 x i64> [[VRSRA_N]]
test_vrsraq_n_u64(uint64x2_t a,uint64x2_t b)14160 uint64x2_t test_vrsraq_n_u64(uint64x2_t a, uint64x2_t b) {
14161   return vrsraq_n_u64(a, b, 1);
14162 }
14163 
14164 // CHECK-LABEL: @test_vrsubhn_s16(
14165 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14166 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14167 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14168 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_s16(int16x8_t a,int16x8_t b)14169 int8x8_t test_vrsubhn_s16(int16x8_t a, int16x8_t b) {
14170   return vrsubhn_s16(a, b);
14171 }
14172 
14173 // CHECK-LABEL: @test_vrsubhn_s32(
14174 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14175 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14176 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14177 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14178 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_s32(int32x4_t a,int32x4_t b)14179 int16x4_t test_vrsubhn_s32(int32x4_t a, int32x4_t b) {
14180   return vrsubhn_s32(a, b);
14181 }
14182 
14183 // CHECK-LABEL: @test_vrsubhn_s64(
14184 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14185 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14186 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14187 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14188 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_s64(int64x2_t a,int64x2_t b)14189 int32x2_t test_vrsubhn_s64(int64x2_t a, int64x2_t b) {
14190   return vrsubhn_s64(a, b);
14191 }
14192 
14193 // CHECK-LABEL: @test_vrsubhn_u16(
14194 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14195 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14196 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vrsubhn.v8i8(<8 x i16> %a, <8 x i16> %b)
14197 // CHECK:   ret <8 x i8> [[VRSUBHN_V2_I]]
test_vrsubhn_u16(uint16x8_t a,uint16x8_t b)14198 uint8x8_t test_vrsubhn_u16(uint16x8_t a, uint16x8_t b) {
14199   return vrsubhn_u16(a, b);
14200 }
14201 
14202 // CHECK-LABEL: @test_vrsubhn_u32(
14203 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14204 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14205 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vrsubhn.v4i16(<4 x i32> %a, <4 x i32> %b)
14206 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <4 x i16> [[VRSUBHN_V2_I]] to <8 x i8>
14207 // CHECK:   ret <4 x i16> [[VRSUBHN_V2_I]]
test_vrsubhn_u32(uint32x4_t a,uint32x4_t b)14208 uint16x4_t test_vrsubhn_u32(uint32x4_t a, uint32x4_t b) {
14209   return vrsubhn_u32(a, b);
14210 }
14211 
14212 // CHECK-LABEL: @test_vrsubhn_u64(
14213 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14214 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14215 // CHECK:   [[VRSUBHN_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vrsubhn.v2i32(<2 x i64> %a, <2 x i64> %b)
14216 // CHECK:   [[VRSUBHN_V3_I:%.*]] = bitcast <2 x i32> [[VRSUBHN_V2_I]] to <8 x i8>
14217 // CHECK:   ret <2 x i32> [[VRSUBHN_V2_I]]
test_vrsubhn_u64(uint64x2_t a,uint64x2_t b)14218 uint32x2_t test_vrsubhn_u64(uint64x2_t a, uint64x2_t b) {
14219   return vrsubhn_u64(a, b);
14220 }
14221 
14222 // CHECK-LABEL: @test_vset_lane_u8(
14223 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14224 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_u8(uint8_t a,uint8x8_t b)14225 uint8x8_t test_vset_lane_u8(uint8_t a, uint8x8_t b) {
14226   return vset_lane_u8(a, b, 7);
14227 }
14228 
14229 // CHECK-LABEL: @test_vset_lane_u16(
14230 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14231 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_u16(uint16_t a,uint16x4_t b)14232 uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) {
14233   return vset_lane_u16(a, b, 3);
14234 }
14235 
14236 // CHECK-LABEL: @test_vset_lane_u32(
14237 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14238 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_u32(uint32_t a,uint32x2_t b)14239 uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) {
14240   return vset_lane_u32(a, b, 1);
14241 }
14242 
14243 // CHECK-LABEL: @test_vset_lane_s8(
14244 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14245 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_s8(int8_t a,int8x8_t b)14246 int8x8_t test_vset_lane_s8(int8_t a, int8x8_t b) {
14247   return vset_lane_s8(a, b, 7);
14248 }
14249 
14250 // CHECK-LABEL: @test_vset_lane_s16(
14251 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14252 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_s16(int16_t a,int16x4_t b)14253 int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) {
14254   return vset_lane_s16(a, b, 3);
14255 }
14256 
14257 // CHECK-LABEL: @test_vset_lane_s32(
14258 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1
14259 // CHECK:   ret <2 x i32> [[VSET_LANE]]
test_vset_lane_s32(int32_t a,int32x2_t b)14260 int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) {
14261   return vset_lane_s32(a, b, 1);
14262 }
14263 
14264 // CHECK-LABEL: @test_vset_lane_p8(
14265 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i8> %b, i8 %a, i32 7
14266 // CHECK:   ret <8 x i8> [[VSET_LANE]]
test_vset_lane_p8(poly8_t a,poly8x8_t b)14267 poly8x8_t test_vset_lane_p8(poly8_t a, poly8x8_t b) {
14268   return vset_lane_p8(a, b, 7);
14269 }
14270 
14271 // CHECK-LABEL: @test_vset_lane_p16(
14272 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3
14273 // CHECK:   ret <4 x i16> [[VSET_LANE]]
test_vset_lane_p16(poly16_t a,poly16x4_t b)14274 poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) {
14275   return vset_lane_p16(a, b, 3);
14276 }
14277 
14278 // CHECK-LABEL: @test_vset_lane_f32(
14279 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1
14280 // CHECK:   ret <2 x float> [[VSET_LANE]]
test_vset_lane_f32(float32_t a,float32x2_t b)14281 float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) {
14282   return vset_lane_f32(a, b, 1);
14283 }
14284 
14285 // CHECK-LABEL: @test_vset_lane_f16(
14286 // CHECK:   [[__REINT_246:%.*]] = alloca half, align 2
14287 // CHECK:   [[__REINT1_246:%.*]] = alloca <4 x half>, align 8
14288 // CHECK:   [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8
14289 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
14290 // CHECK:   store half [[TMP0]], half* [[__REINT_246]], align 2
14291 // CHECK:   store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8
14292 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16*
14293 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14294 // CHECK:   [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>*
14295 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8
14296 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1
14297 // CHECK:   store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8
14298 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>*
14299 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8
14300 // CHECK:   ret <4 x half> [[TMP8]]
test_vset_lane_f16(float16_t * a,float16x4_t b)14301 float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) {
14302   return vset_lane_f16(*a, b, 1);
14303 }
14304 
14305 // CHECK-LABEL: @test_vsetq_lane_u8(
14306 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14307 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_u8(uint8_t a,uint8x16_t b)14308 uint8x16_t test_vsetq_lane_u8(uint8_t a, uint8x16_t b) {
14309   return vsetq_lane_u8(a, b, 15);
14310 }
14311 
14312 // CHECK-LABEL: @test_vsetq_lane_u16(
14313 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14314 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_u16(uint16_t a,uint16x8_t b)14315 uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) {
14316   return vsetq_lane_u16(a, b, 7);
14317 }
14318 
14319 // CHECK-LABEL: @test_vsetq_lane_u32(
14320 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14321 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_u32(uint32_t a,uint32x4_t b)14322 uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) {
14323   return vsetq_lane_u32(a, b, 3);
14324 }
14325 
14326 // CHECK-LABEL: @test_vsetq_lane_s8(
14327 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14328 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_s8(int8_t a,int8x16_t b)14329 int8x16_t test_vsetq_lane_s8(int8_t a, int8x16_t b) {
14330   return vsetq_lane_s8(a, b, 15);
14331 }
14332 
14333 // CHECK-LABEL: @test_vsetq_lane_s16(
14334 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14335 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_s16(int16_t a,int16x8_t b)14336 int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) {
14337   return vsetq_lane_s16(a, b, 7);
14338 }
14339 
14340 // CHECK-LABEL: @test_vsetq_lane_s32(
14341 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3
14342 // CHECK:   ret <4 x i32> [[VSET_LANE]]
test_vsetq_lane_s32(int32_t a,int32x4_t b)14343 int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) {
14344   return vsetq_lane_s32(a, b, 3);
14345 }
14346 
14347 // CHECK-LABEL: @test_vsetq_lane_p8(
14348 // CHECK:   [[VSET_LANE:%.*]] = insertelement <16 x i8> %b, i8 %a, i32 15
14349 // CHECK:   ret <16 x i8> [[VSET_LANE]]
test_vsetq_lane_p8(poly8_t a,poly8x16_t b)14350 poly8x16_t test_vsetq_lane_p8(poly8_t a, poly8x16_t b) {
14351   return vsetq_lane_p8(a, b, 15);
14352 }
14353 
14354 // CHECK-LABEL: @test_vsetq_lane_p16(
14355 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7
14356 // CHECK:   ret <8 x i16> [[VSET_LANE]]
test_vsetq_lane_p16(poly16_t a,poly16x8_t b)14357 poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) {
14358   return vsetq_lane_p16(a, b, 7);
14359 }
14360 
14361 // CHECK-LABEL: @test_vsetq_lane_f32(
14362 // CHECK:   [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3
14363 // CHECK:   ret <4 x float> [[VSET_LANE]]
test_vsetq_lane_f32(float32_t a,float32x4_t b)14364 float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) {
14365   return vsetq_lane_f32(a, b, 3);
14366 }
14367 
14368 // CHECK-LABEL: @test_vsetq_lane_f16(
14369 // CHECK:   [[__REINT_248:%.*]] = alloca half, align 2
14370 // CHECK:   [[__REINT1_248:%.*]] = alloca <8 x half>, align 16
14371 // CHECK:   [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16
14372 // CHECK:   [[TMP0:%.*]] = load half, half* %a, align 2
14373 // CHECK:   store half [[TMP0]], half* [[__REINT_248]], align 2
14374 // CHECK:   store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16
14375 // CHECK:   [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16*
14376 // CHECK:   [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2
14377 // CHECK:   [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>*
14378 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16
14379 // CHECK:   [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3
14380 // CHECK:   store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16
14381 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>*
14382 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16
14383 // CHECK:   ret <8 x half> [[TMP8]]
test_vsetq_lane_f16(float16_t * a,float16x8_t b)14384 float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) {
14385   return vsetq_lane_f16(*a, b, 3);
14386 }
14387 
14388 // CHECK-LABEL: @test_vset_lane_s64(
14389 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14390 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_s64(int64_t a,int64x1_t b)14391 int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) {
14392   return vset_lane_s64(a, b, 0);
14393 }
14394 
14395 // CHECK-LABEL: @test_vset_lane_u64(
14396 // CHECK:   [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0
14397 // CHECK:   ret <1 x i64> [[VSET_LANE]]
test_vset_lane_u64(uint64_t a,uint64x1_t b)14398 uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) {
14399   return vset_lane_u64(a, b, 0);
14400 }
14401 
14402 // CHECK-LABEL: @test_vsetq_lane_s64(
14403 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14404 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_s64(int64_t a,int64x2_t b)14405 int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) {
14406   return vsetq_lane_s64(a, b, 1);
14407 }
14408 
14409 // CHECK-LABEL: @test_vsetq_lane_u64(
14410 // CHECK:   [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1
14411 // CHECK:   ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_u64(uint64_t a,uint64x2_t b)14412 uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) {
14413   return vsetq_lane_u64(a, b, 1);
14414 }
14415 
14416 // CHECK-LABEL: @test_vshl_s8(
14417 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshifts.v8i8(<8 x i8> %a, <8 x i8> %b)
14418 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_s8(int8x8_t a,int8x8_t b)14419 int8x8_t test_vshl_s8(int8x8_t a, int8x8_t b) {
14420   return vshl_s8(a, b);
14421 }
14422 
14423 // CHECK-LABEL: @test_vshl_s16(
14424 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14425 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14426 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshifts.v4i16(<4 x i16> %a, <4 x i16> %b)
14427 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14428 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_s16(int16x4_t a,int16x4_t b)14429 int16x4_t test_vshl_s16(int16x4_t a, int16x4_t b) {
14430   return vshl_s16(a, b);
14431 }
14432 
14433 // CHECK-LABEL: @test_vshl_s32(
14434 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14435 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14436 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshifts.v2i32(<2 x i32> %a, <2 x i32> %b)
14437 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14438 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_s32(int32x2_t a,int32x2_t b)14439 int32x2_t test_vshl_s32(int32x2_t a, int32x2_t b) {
14440   return vshl_s32(a, b);
14441 }
14442 
14443 // CHECK-LABEL: @test_vshl_s64(
14444 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14445 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14446 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshifts.v1i64(<1 x i64> %a, <1 x i64> %b)
14447 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14448 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_s64(int64x1_t a,int64x1_t b)14449 int64x1_t test_vshl_s64(int64x1_t a, int64x1_t b) {
14450   return vshl_s64(a, b);
14451 }
14452 
14453 // CHECK-LABEL: @test_vshl_u8(
14454 // CHECK:   [[VSHL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftu.v8i8(<8 x i8> %a, <8 x i8> %b)
14455 // CHECK:   ret <8 x i8> [[VSHL_V_I]]
test_vshl_u8(uint8x8_t a,int8x8_t b)14456 uint8x8_t test_vshl_u8(uint8x8_t a, int8x8_t b) {
14457   return vshl_u8(a, b);
14458 }
14459 
14460 // CHECK-LABEL: @test_vshl_u16(
14461 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14462 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14463 // CHECK:   [[VSHL_V2_I:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftu.v4i16(<4 x i16> %a, <4 x i16> %b)
14464 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <4 x i16> [[VSHL_V2_I]] to <8 x i8>
14465 // CHECK:   ret <4 x i16> [[VSHL_V2_I]]
test_vshl_u16(uint16x4_t a,int16x4_t b)14466 uint16x4_t test_vshl_u16(uint16x4_t a, int16x4_t b) {
14467   return vshl_u16(a, b);
14468 }
14469 
14470 // CHECK-LABEL: @test_vshl_u32(
14471 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14472 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14473 // CHECK:   [[VSHL_V2_I:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftu.v2i32(<2 x i32> %a, <2 x i32> %b)
14474 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <2 x i32> [[VSHL_V2_I]] to <8 x i8>
14475 // CHECK:   ret <2 x i32> [[VSHL_V2_I]]
test_vshl_u32(uint32x2_t a,int32x2_t b)14476 uint32x2_t test_vshl_u32(uint32x2_t a, int32x2_t b) {
14477   return vshl_u32(a, b);
14478 }
14479 
14480 // CHECK-LABEL: @test_vshl_u64(
14481 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14482 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14483 // CHECK:   [[VSHL_V2_I:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftu.v1i64(<1 x i64> %a, <1 x i64> %b)
14484 // CHECK:   [[VSHL_V3_I:%.*]] = bitcast <1 x i64> [[VSHL_V2_I]] to <8 x i8>
14485 // CHECK:   ret <1 x i64> [[VSHL_V2_I]]
test_vshl_u64(uint64x1_t a,int64x1_t b)14486 uint64x1_t test_vshl_u64(uint64x1_t a, int64x1_t b) {
14487   return vshl_u64(a, b);
14488 }
14489 
14490 // CHECK-LABEL: @test_vshlq_s8(
14491 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshifts.v16i8(<16 x i8> %a, <16 x i8> %b)
14492 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_s8(int8x16_t a,int8x16_t b)14493 int8x16_t test_vshlq_s8(int8x16_t a, int8x16_t b) {
14494   return vshlq_s8(a, b);
14495 }
14496 
14497 // CHECK-LABEL: @test_vshlq_s16(
14498 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14499 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14500 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshifts.v8i16(<8 x i16> %a, <8 x i16> %b)
14501 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14502 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_s16(int16x8_t a,int16x8_t b)14503 int16x8_t test_vshlq_s16(int16x8_t a, int16x8_t b) {
14504   return vshlq_s16(a, b);
14505 }
14506 
14507 // CHECK-LABEL: @test_vshlq_s32(
14508 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14509 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14510 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshifts.v4i32(<4 x i32> %a, <4 x i32> %b)
14511 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14512 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_s32(int32x4_t a,int32x4_t b)14513 int32x4_t test_vshlq_s32(int32x4_t a, int32x4_t b) {
14514   return vshlq_s32(a, b);
14515 }
14516 
14517 // CHECK-LABEL: @test_vshlq_s64(
14518 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14519 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14520 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshifts.v2i64(<2 x i64> %a, <2 x i64> %b)
14521 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14522 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_s64(int64x2_t a,int64x2_t b)14523 int64x2_t test_vshlq_s64(int64x2_t a, int64x2_t b) {
14524   return vshlq_s64(a, b);
14525 }
14526 
14527 // CHECK-LABEL: @test_vshlq_u8(
14528 // CHECK:   [[VSHLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftu.v16i8(<16 x i8> %a, <16 x i8> %b)
14529 // CHECK:   ret <16 x i8> [[VSHLQ_V_I]]
test_vshlq_u8(uint8x16_t a,int8x16_t b)14530 uint8x16_t test_vshlq_u8(uint8x16_t a, int8x16_t b) {
14531   return vshlq_u8(a, b);
14532 }
14533 
14534 // CHECK-LABEL: @test_vshlq_u16(
14535 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14536 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
14537 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftu.v8i16(<8 x i16> %a, <8 x i16> %b)
14538 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <8 x i16> [[VSHLQ_V2_I]] to <16 x i8>
14539 // CHECK:   ret <8 x i16> [[VSHLQ_V2_I]]
test_vshlq_u16(uint16x8_t a,int16x8_t b)14540 uint16x8_t test_vshlq_u16(uint16x8_t a, int16x8_t b) {
14541   return vshlq_u16(a, b);
14542 }
14543 
14544 // CHECK-LABEL: @test_vshlq_u32(
14545 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14546 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
14547 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftu.v4i32(<4 x i32> %a, <4 x i32> %b)
14548 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <4 x i32> [[VSHLQ_V2_I]] to <16 x i8>
14549 // CHECK:   ret <4 x i32> [[VSHLQ_V2_I]]
test_vshlq_u32(uint32x4_t a,int32x4_t b)14550 uint32x4_t test_vshlq_u32(uint32x4_t a, int32x4_t b) {
14551   return vshlq_u32(a, b);
14552 }
14553 
14554 // CHECK-LABEL: @test_vshlq_u64(
14555 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14556 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
14557 // CHECK:   [[VSHLQ_V2_I:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftu.v2i64(<2 x i64> %a, <2 x i64> %b)
14558 // CHECK:   [[VSHLQ_V3_I:%.*]] = bitcast <2 x i64> [[VSHLQ_V2_I]] to <16 x i8>
14559 // CHECK:   ret <2 x i64> [[VSHLQ_V2_I]]
test_vshlq_u64(uint64x2_t a,int64x2_t b)14560 uint64x2_t test_vshlq_u64(uint64x2_t a, int64x2_t b) {
14561   return vshlq_u64(a, b);
14562 }
14563 
14564 // CHECK-LABEL: @test_vshll_n_s8(
14565 // CHECK:   [[TMP0:%.*]] = sext <8 x i8> %a to <8 x i16>
14566 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14567 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_s8(int8x8_t a)14568 int16x8_t test_vshll_n_s8(int8x8_t a) {
14569   return vshll_n_s8(a, 1);
14570 }
14571 
14572 // CHECK-LABEL: @test_vshll_n_s16(
14573 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14574 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14575 // CHECK:   [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32>
14576 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14577 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_s16(int16x4_t a)14578 int32x4_t test_vshll_n_s16(int16x4_t a) {
14579   return vshll_n_s16(a, 1);
14580 }
14581 
14582 // CHECK-LABEL: @test_vshll_n_s32(
14583 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14584 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14585 // CHECK:   [[TMP2:%.*]] = sext <2 x i32> [[TMP1]] to <2 x i64>
14586 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14587 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_s32(int32x2_t a)14588 int64x2_t test_vshll_n_s32(int32x2_t a) {
14589   return vshll_n_s32(a, 1);
14590 }
14591 
14592 // CHECK-LABEL: @test_vshll_n_u8(
14593 // CHECK:   [[TMP0:%.*]] = zext <8 x i8> %a to <8 x i16>
14594 // CHECK:   [[VSHLL_N:%.*]] = shl <8 x i16> [[TMP0]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14595 // CHECK:   ret <8 x i16> [[VSHLL_N]]
test_vshll_n_u8(uint8x8_t a)14596 uint16x8_t test_vshll_n_u8(uint8x8_t a) {
14597   return vshll_n_u8(a, 1);
14598 }
14599 
14600 // CHECK-LABEL: @test_vshll_n_u16(
14601 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14602 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14603 // CHECK:   [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
14604 // CHECK:   [[VSHLL_N:%.*]] = shl <4 x i32> [[TMP2]], <i32 1, i32 1, i32 1, i32 1>
14605 // CHECK:   ret <4 x i32> [[VSHLL_N]]
test_vshll_n_u16(uint16x4_t a)14606 uint32x4_t test_vshll_n_u16(uint16x4_t a) {
14607   return vshll_n_u16(a, 1);
14608 }
14609 
14610 // CHECK-LABEL: @test_vshll_n_u32(
14611 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14612 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14613 // CHECK:   [[TMP2:%.*]] = zext <2 x i32> [[TMP1]] to <2 x i64>
14614 // CHECK:   [[VSHLL_N:%.*]] = shl <2 x i64> [[TMP2]], <i64 1, i64 1>
14615 // CHECK:   ret <2 x i64> [[VSHLL_N]]
test_vshll_n_u32(uint32x2_t a)14616 uint64x2_t test_vshll_n_u32(uint32x2_t a) {
14617   return vshll_n_u32(a, 1);
14618 }
14619 
14620 // CHECK-LABEL: @test_vshl_n_s8(
14621 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14622 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_s8(int8x8_t a)14623 int8x8_t test_vshl_n_s8(int8x8_t a) {
14624   return vshl_n_s8(a, 1);
14625 }
14626 
14627 // CHECK-LABEL: @test_vshl_n_s16(
14628 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14629 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14630 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14631 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_s16(int16x4_t a)14632 int16x4_t test_vshl_n_s16(int16x4_t a) {
14633   return vshl_n_s16(a, 1);
14634 }
14635 
14636 // CHECK-LABEL: @test_vshl_n_s32(
14637 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14638 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14639 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14640 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_s32(int32x2_t a)14641 int32x2_t test_vshl_n_s32(int32x2_t a) {
14642   return vshl_n_s32(a, 1);
14643 }
14644 
14645 // CHECK-LABEL: @test_vshl_n_s64(
14646 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14647 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14648 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14649 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_s64(int64x1_t a)14650 int64x1_t test_vshl_n_s64(int64x1_t a) {
14651   return vshl_n_s64(a, 1);
14652 }
14653 
14654 // CHECK-LABEL: @test_vshl_n_u8(
14655 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14656 // CHECK:   ret <8 x i8> [[VSHL_N]]
test_vshl_n_u8(uint8x8_t a)14657 uint8x8_t test_vshl_n_u8(uint8x8_t a) {
14658   return vshl_n_u8(a, 1);
14659 }
14660 
14661 // CHECK-LABEL: @test_vshl_n_u16(
14662 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14663 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14664 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14665 // CHECK:   ret <4 x i16> [[VSHL_N]]
test_vshl_n_u16(uint16x4_t a)14666 uint16x4_t test_vshl_n_u16(uint16x4_t a) {
14667   return vshl_n_u16(a, 1);
14668 }
14669 
14670 // CHECK-LABEL: @test_vshl_n_u32(
14671 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14672 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14673 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i32> [[TMP1]], <i32 1, i32 1>
14674 // CHECK:   ret <2 x i32> [[VSHL_N]]
test_vshl_n_u32(uint32x2_t a)14675 uint32x2_t test_vshl_n_u32(uint32x2_t a) {
14676   return vshl_n_u32(a, 1);
14677 }
14678 
14679 // CHECK-LABEL: @test_vshl_n_u64(
14680 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14681 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14682 // CHECK:   [[VSHL_N:%.*]] = shl <1 x i64> [[TMP1]], <i64 1>
14683 // CHECK:   ret <1 x i64> [[VSHL_N]]
test_vshl_n_u64(uint64x1_t a)14684 uint64x1_t test_vshl_n_u64(uint64x1_t a) {
14685   return vshl_n_u64(a, 1);
14686 }
14687 
14688 // CHECK-LABEL: @test_vshlq_n_s8(
14689 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14690 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_s8(int8x16_t a)14691 int8x16_t test_vshlq_n_s8(int8x16_t a) {
14692   return vshlq_n_s8(a, 1);
14693 }
14694 
14695 // CHECK-LABEL: @test_vshlq_n_s16(
14696 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14697 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14698 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14699 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_s16(int16x8_t a)14700 int16x8_t test_vshlq_n_s16(int16x8_t a) {
14701   return vshlq_n_s16(a, 1);
14702 }
14703 
14704 // CHECK-LABEL: @test_vshlq_n_s32(
14705 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14706 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14707 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14708 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_s32(int32x4_t a)14709 int32x4_t test_vshlq_n_s32(int32x4_t a) {
14710   return vshlq_n_s32(a, 1);
14711 }
14712 
14713 // CHECK-LABEL: @test_vshlq_n_s64(
14714 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14715 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14716 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14717 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_s64(int64x2_t a)14718 int64x2_t test_vshlq_n_s64(int64x2_t a) {
14719   return vshlq_n_s64(a, 1);
14720 }
14721 
14722 // CHECK-LABEL: @test_vshlq_n_u8(
14723 // CHECK:   [[VSHL_N:%.*]] = shl <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14724 // CHECK:   ret <16 x i8> [[VSHL_N]]
test_vshlq_n_u8(uint8x16_t a)14725 uint8x16_t test_vshlq_n_u8(uint8x16_t a) {
14726   return vshlq_n_u8(a, 1);
14727 }
14728 
14729 // CHECK-LABEL: @test_vshlq_n_u16(
14730 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14731 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14732 // CHECK:   [[VSHL_N:%.*]] = shl <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14733 // CHECK:   ret <8 x i16> [[VSHL_N]]
test_vshlq_n_u16(uint16x8_t a)14734 uint16x8_t test_vshlq_n_u16(uint16x8_t a) {
14735   return vshlq_n_u16(a, 1);
14736 }
14737 
14738 // CHECK-LABEL: @test_vshlq_n_u32(
14739 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14740 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14741 // CHECK:   [[VSHL_N:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14742 // CHECK:   ret <4 x i32> [[VSHL_N]]
test_vshlq_n_u32(uint32x4_t a)14743 uint32x4_t test_vshlq_n_u32(uint32x4_t a) {
14744   return vshlq_n_u32(a, 1);
14745 }
14746 
14747 // CHECK-LABEL: @test_vshlq_n_u64(
14748 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14749 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14750 // CHECK:   [[VSHL_N:%.*]] = shl <2 x i64> [[TMP1]], <i64 1, i64 1>
14751 // CHECK:   ret <2 x i64> [[VSHL_N]]
test_vshlq_n_u64(uint64x2_t a)14752 uint64x2_t test_vshlq_n_u64(uint64x2_t a) {
14753   return vshlq_n_u64(a, 1);
14754 }
14755 
14756 // CHECK-LABEL: @test_vshrn_n_s16(
14757 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14758 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14759 // CHECK:   [[TMP2:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14760 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14761 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_s16(int16x8_t a)14762 int8x8_t test_vshrn_n_s16(int16x8_t a) {
14763   return vshrn_n_s16(a, 1);
14764 }
14765 
14766 // CHECK-LABEL: @test_vshrn_n_s32(
14767 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14768 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14769 // CHECK:   [[TMP2:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14770 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14771 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_s32(int32x4_t a)14772 int16x4_t test_vshrn_n_s32(int32x4_t a) {
14773   return vshrn_n_s32(a, 1);
14774 }
14775 
14776 // CHECK-LABEL: @test_vshrn_n_s64(
14777 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14778 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14779 // CHECK:   [[TMP2:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14780 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14781 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_s64(int64x2_t a)14782 int32x2_t test_vshrn_n_s64(int64x2_t a) {
14783   return vshrn_n_s64(a, 1);
14784 }
14785 
14786 // CHECK-LABEL: @test_vshrn_n_u16(
14787 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14788 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14789 // CHECK:   [[TMP2:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14790 // CHECK:   [[VSHRN_N:%.*]] = trunc <8 x i16> [[TMP2]] to <8 x i8>
14791 // CHECK:   ret <8 x i8> [[VSHRN_N]]
test_vshrn_n_u16(uint16x8_t a)14792 uint8x8_t test_vshrn_n_u16(uint16x8_t a) {
14793   return vshrn_n_u16(a, 1);
14794 }
14795 
14796 // CHECK-LABEL: @test_vshrn_n_u32(
14797 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14798 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14799 // CHECK:   [[TMP2:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14800 // CHECK:   [[VSHRN_N:%.*]] = trunc <4 x i32> [[TMP2]] to <4 x i16>
14801 // CHECK:   ret <4 x i16> [[VSHRN_N]]
test_vshrn_n_u32(uint32x4_t a)14802 uint16x4_t test_vshrn_n_u32(uint32x4_t a) {
14803   return vshrn_n_u32(a, 1);
14804 }
14805 
14806 // CHECK-LABEL: @test_vshrn_n_u64(
14807 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14808 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14809 // CHECK:   [[TMP2:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14810 // CHECK:   [[VSHRN_N:%.*]] = trunc <2 x i64> [[TMP2]] to <2 x i32>
14811 // CHECK:   ret <2 x i32> [[VSHRN_N]]
test_vshrn_n_u64(uint64x2_t a)14812 uint32x2_t test_vshrn_n_u64(uint64x2_t a) {
14813   return vshrn_n_u64(a, 1);
14814 }
14815 
14816 // CHECK-LABEL: @test_vshr_n_s8(
14817 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14818 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_s8(int8x8_t a)14819 int8x8_t test_vshr_n_s8(int8x8_t a) {
14820   return vshr_n_s8(a, 1);
14821 }
14822 
14823 // CHECK-LABEL: @test_vshr_n_s16(
14824 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14825 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14826 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14827 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_s16(int16x4_t a)14828 int16x4_t test_vshr_n_s16(int16x4_t a) {
14829   return vshr_n_s16(a, 1);
14830 }
14831 
14832 // CHECK-LABEL: @test_vshr_n_s32(
14833 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14834 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14835 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i32> [[TMP1]], <i32 1, i32 1>
14836 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_s32(int32x2_t a)14837 int32x2_t test_vshr_n_s32(int32x2_t a) {
14838   return vshr_n_s32(a, 1);
14839 }
14840 
14841 // CHECK-LABEL: @test_vshr_n_s64(
14842 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14843 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14844 // CHECK:   [[VSHR_N:%.*]] = ashr <1 x i64> [[TMP1]], <i64 1>
14845 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_s64(int64x1_t a)14846 int64x1_t test_vshr_n_s64(int64x1_t a) {
14847   return vshr_n_s64(a, 1);
14848 }
14849 
14850 // CHECK-LABEL: @test_vshr_n_u8(
14851 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14852 // CHECK:   ret <8 x i8> [[VSHR_N]]
test_vshr_n_u8(uint8x8_t a)14853 uint8x8_t test_vshr_n_u8(uint8x8_t a) {
14854   return vshr_n_u8(a, 1);
14855 }
14856 
14857 // CHECK-LABEL: @test_vshr_n_u16(
14858 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14859 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14860 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1>
14861 // CHECK:   ret <4 x i16> [[VSHR_N]]
test_vshr_n_u16(uint16x4_t a)14862 uint16x4_t test_vshr_n_u16(uint16x4_t a) {
14863   return vshr_n_u16(a, 1);
14864 }
14865 
14866 // CHECK-LABEL: @test_vshr_n_u32(
14867 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14868 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14869 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i32> [[TMP1]], <i32 1, i32 1>
14870 // CHECK:   ret <2 x i32> [[VSHR_N]]
test_vshr_n_u32(uint32x2_t a)14871 uint32x2_t test_vshr_n_u32(uint32x2_t a) {
14872   return vshr_n_u32(a, 1);
14873 }
14874 
14875 // CHECK-LABEL: @test_vshr_n_u64(
14876 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14877 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14878 // CHECK:   [[VSHR_N:%.*]] = lshr <1 x i64> [[TMP1]], <i64 1>
14879 // CHECK:   ret <1 x i64> [[VSHR_N]]
test_vshr_n_u64(uint64x1_t a)14880 uint64x1_t test_vshr_n_u64(uint64x1_t a) {
14881   return vshr_n_u64(a, 1);
14882 }
14883 
14884 // CHECK-LABEL: @test_vshrq_n_s8(
14885 // CHECK:   [[VSHR_N:%.*]] = ashr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14886 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_s8(int8x16_t a)14887 int8x16_t test_vshrq_n_s8(int8x16_t a) {
14888   return vshrq_n_s8(a, 1);
14889 }
14890 
14891 // CHECK-LABEL: @test_vshrq_n_s16(
14892 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14893 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14894 // CHECK:   [[VSHR_N:%.*]] = ashr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14895 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_s16(int16x8_t a)14896 int16x8_t test_vshrq_n_s16(int16x8_t a) {
14897   return vshrq_n_s16(a, 1);
14898 }
14899 
14900 // CHECK-LABEL: @test_vshrq_n_s32(
14901 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14902 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14903 // CHECK:   [[VSHR_N:%.*]] = ashr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14904 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_s32(int32x4_t a)14905 int32x4_t test_vshrq_n_s32(int32x4_t a) {
14906   return vshrq_n_s32(a, 1);
14907 }
14908 
14909 // CHECK-LABEL: @test_vshrq_n_s64(
14910 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14911 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14912 // CHECK:   [[VSHR_N:%.*]] = ashr <2 x i64> [[TMP1]], <i64 1, i64 1>
14913 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_s64(int64x2_t a)14914 int64x2_t test_vshrq_n_s64(int64x2_t a) {
14915   return vshrq_n_s64(a, 1);
14916 }
14917 
14918 // CHECK-LABEL: @test_vshrq_n_u8(
14919 // CHECK:   [[VSHR_N:%.*]] = lshr <16 x i8> %a, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
14920 // CHECK:   ret <16 x i8> [[VSHR_N]]
test_vshrq_n_u8(uint8x16_t a)14921 uint8x16_t test_vshrq_n_u8(uint8x16_t a) {
14922   return vshrq_n_u8(a, 1);
14923 }
14924 
14925 // CHECK-LABEL: @test_vshrq_n_u16(
14926 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
14927 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
14928 // CHECK:   [[VSHR_N:%.*]] = lshr <8 x i16> [[TMP1]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
14929 // CHECK:   ret <8 x i16> [[VSHR_N]]
test_vshrq_n_u16(uint16x8_t a)14930 uint16x8_t test_vshrq_n_u16(uint16x8_t a) {
14931   return vshrq_n_u16(a, 1);
14932 }
14933 
14934 // CHECK-LABEL: @test_vshrq_n_u32(
14935 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
14936 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
14937 // CHECK:   [[VSHR_N:%.*]] = lshr <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 1>
14938 // CHECK:   ret <4 x i32> [[VSHR_N]]
test_vshrq_n_u32(uint32x4_t a)14939 uint32x4_t test_vshrq_n_u32(uint32x4_t a) {
14940   return vshrq_n_u32(a, 1);
14941 }
14942 
14943 // CHECK-LABEL: @test_vshrq_n_u64(
14944 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
14945 // CHECK:   [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
14946 // CHECK:   [[VSHR_N:%.*]] = lshr <2 x i64> [[TMP1]], <i64 1, i64 1>
14947 // CHECK:   ret <2 x i64> [[VSHR_N]]
test_vshrq_n_u64(uint64x2_t a)14948 uint64x2_t test_vshrq_n_u64(uint64x2_t a) {
14949   return vshrq_n_u64(a, 1);
14950 }
14951 
14952 // CHECK-LABEL: @test_vsli_n_s8(
14953 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14954 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_s8(int8x8_t a,int8x8_t b)14955 int8x8_t test_vsli_n_s8(int8x8_t a, int8x8_t b) {
14956   return vsli_n_s8(a, b, 1);
14957 }
14958 
14959 // CHECK-LABEL: @test_vsli_n_s16(
14960 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
14961 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
14962 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
14963 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
14964 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
14965 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_s16(int16x4_t a,int16x4_t b)14966 int16x4_t test_vsli_n_s16(int16x4_t a, int16x4_t b) {
14967   return vsli_n_s16(a, b, 1);
14968 }
14969 
14970 // CHECK-LABEL: @test_vsli_n_s32(
14971 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
14972 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
14973 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
14974 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
14975 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
14976 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_s32(int32x2_t a,int32x2_t b)14977 int32x2_t test_vsli_n_s32(int32x2_t a, int32x2_t b) {
14978   return vsli_n_s32(a, b, 1);
14979 }
14980 
14981 // CHECK-LABEL: @test_vsli_n_s64(
14982 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
14983 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
14984 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
14985 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
14986 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
14987 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_s64(int64x1_t a,int64x1_t b)14988 int64x1_t test_vsli_n_s64(int64x1_t a, int64x1_t b) {
14989   return vsli_n_s64(a, b, 1);
14990 }
14991 
14992 // CHECK-LABEL: @test_vsli_n_u8(
14993 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
14994 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_u8(uint8x8_t a,uint8x8_t b)14995 uint8x8_t test_vsli_n_u8(uint8x8_t a, uint8x8_t b) {
14996   return vsli_n_u8(a, b, 1);
14997 }
14998 
14999 // CHECK-LABEL: @test_vsli_n_u16(
15000 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15001 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15002 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15003 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15004 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
15005 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_u16(uint16x4_t a,uint16x4_t b)15006 uint16x4_t test_vsli_n_u16(uint16x4_t a, uint16x4_t b) {
15007   return vsli_n_u16(a, b, 1);
15008 }
15009 
15010 // CHECK-LABEL: @test_vsli_n_u32(
15011 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15012 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15013 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15014 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15015 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 1, i32 1>)
15016 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsli_n_u32(uint32x2_t a,uint32x2_t b)15017 uint32x2_t test_vsli_n_u32(uint32x2_t a, uint32x2_t b) {
15018   return vsli_n_u32(a, b, 1);
15019 }
15020 
15021 // CHECK-LABEL: @test_vsli_n_u64(
15022 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15023 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15024 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15025 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15026 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 1>)
15027 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsli_n_u64(uint64x1_t a,uint64x1_t b)15028 uint64x1_t test_vsli_n_u64(uint64x1_t a, uint64x1_t b) {
15029   return vsli_n_u64(a, b, 1);
15030 }
15031 
15032 // CHECK-LABEL: @test_vsli_n_p8(
15033 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15034 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsli_n_p8(poly8x8_t a,poly8x8_t b)15035 poly8x8_t test_vsli_n_p8(poly8x8_t a, poly8x8_t b) {
15036   return vsli_n_p8(a, b, 1);
15037 }
15038 
15039 // CHECK-LABEL: @test_vsli_n_p16(
15040 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15041 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15042 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15043 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15044 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 1, i16 1, i16 1, i16 1>)
15045 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsli_n_p16(poly16x4_t a,poly16x4_t b)15046 poly16x4_t test_vsli_n_p16(poly16x4_t a, poly16x4_t b) {
15047   return vsli_n_p16(a, b, 1);
15048 }
15049 
15050 // CHECK-LABEL: @test_vsliq_n_s8(
15051 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15052 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_s8(int8x16_t a,int8x16_t b)15053 int8x16_t test_vsliq_n_s8(int8x16_t a, int8x16_t b) {
15054   return vsliq_n_s8(a, b, 1);
15055 }
15056 
15057 // CHECK-LABEL: @test_vsliq_n_s16(
15058 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15059 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15060 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15061 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15062 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15063 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_s16(int16x8_t a,int16x8_t b)15064 int16x8_t test_vsliq_n_s16(int16x8_t a, int16x8_t b) {
15065   return vsliq_n_s16(a, b, 1);
15066 }
15067 
15068 // CHECK-LABEL: @test_vsliq_n_s32(
15069 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15070 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15071 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15072 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15073 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15074 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_s32(int32x4_t a,int32x4_t b)15075 int32x4_t test_vsliq_n_s32(int32x4_t a, int32x4_t b) {
15076   return vsliq_n_s32(a, b, 1);
15077 }
15078 
15079 // CHECK-LABEL: @test_vsliq_n_s64(
15080 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15081 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15082 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15083 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15084 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15085 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_s64(int64x2_t a,int64x2_t b)15086 int64x2_t test_vsliq_n_s64(int64x2_t a, int64x2_t b) {
15087   return vsliq_n_s64(a, b, 1);
15088 }
15089 
15090 // CHECK-LABEL: @test_vsliq_n_u8(
15091 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15092 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_u8(uint8x16_t a,uint8x16_t b)15093 uint8x16_t test_vsliq_n_u8(uint8x16_t a, uint8x16_t b) {
15094   return vsliq_n_u8(a, b, 1);
15095 }
15096 
15097 // CHECK-LABEL: @test_vsliq_n_u16(
15098 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15099 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15100 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15101 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15102 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15103 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_u16(uint16x8_t a,uint16x8_t b)15104 uint16x8_t test_vsliq_n_u16(uint16x8_t a, uint16x8_t b) {
15105   return vsliq_n_u16(a, b, 1);
15106 }
15107 
15108 // CHECK-LABEL: @test_vsliq_n_u32(
15109 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15110 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15111 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15112 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15113 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
15114 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsliq_n_u32(uint32x4_t a,uint32x4_t b)15115 uint32x4_t test_vsliq_n_u32(uint32x4_t a, uint32x4_t b) {
15116   return vsliq_n_u32(a, b, 1);
15117 }
15118 
15119 // CHECK-LABEL: @test_vsliq_n_u64(
15120 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15121 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15122 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15123 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15124 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 1, i64 1>)
15125 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsliq_n_u64(uint64x2_t a,uint64x2_t b)15126 uint64x2_t test_vsliq_n_u64(uint64x2_t a, uint64x2_t b) {
15127   return vsliq_n_u64(a, b, 1);
15128 }
15129 
15130 // CHECK-LABEL: @test_vsliq_n_p8(
15131 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
15132 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsliq_n_p8(poly8x16_t a,poly8x16_t b)15133 poly8x16_t test_vsliq_n_p8(poly8x16_t a, poly8x16_t b) {
15134   return vsliq_n_p8(a, b, 1);
15135 }
15136 
15137 // CHECK-LABEL: @test_vsliq_n_p16(
15138 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15139 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15140 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15141 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15142 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
15143 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsliq_n_p16(poly16x8_t a,poly16x8_t b)15144 poly16x8_t test_vsliq_n_p16(poly16x8_t a, poly16x8_t b) {
15145   return vsliq_n_p16(a, b, 1);
15146 }
15147 
15148 // CHECK-LABEL: @test_vsra_n_s8(
15149 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15150 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15151 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_s8(int8x8_t a,int8x8_t b)15152 int8x8_t test_vsra_n_s8(int8x8_t a, int8x8_t b) {
15153   return vsra_n_s8(a, b, 1);
15154 }
15155 
15156 // CHECK-LABEL: @test_vsra_n_s16(
15157 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15158 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15159 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15160 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15161 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15162 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15163 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_s16(int16x4_t a,int16x4_t b)15164 int16x4_t test_vsra_n_s16(int16x4_t a, int16x4_t b) {
15165   return vsra_n_s16(a, b, 1);
15166 }
15167 
15168 // CHECK-LABEL: @test_vsra_n_s32(
15169 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15170 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15171 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15172 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15173 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i32> [[TMP3]], <i32 1, i32 1>
15174 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15175 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_s32(int32x2_t a,int32x2_t b)15176 int32x2_t test_vsra_n_s32(int32x2_t a, int32x2_t b) {
15177   return vsra_n_s32(a, b, 1);
15178 }
15179 
15180 // CHECK-LABEL: @test_vsra_n_s64(
15181 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15182 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15183 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15184 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15185 // CHECK:   [[VSRA_N:%.*]] = ashr <1 x i64> [[TMP3]], <i64 1>
15186 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15187 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_s64(int64x1_t a,int64x1_t b)15188 int64x1_t test_vsra_n_s64(int64x1_t a, int64x1_t b) {
15189   return vsra_n_s64(a, b, 1);
15190 }
15191 
15192 // CHECK-LABEL: @test_vsra_n_u8(
15193 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15194 // CHECK:   [[TMP0:%.*]] = add <8 x i8> %a, [[VSRA_N]]
15195 // CHECK:   ret <8 x i8> [[TMP0]]
test_vsra_n_u8(uint8x8_t a,uint8x8_t b)15196 uint8x8_t test_vsra_n_u8(uint8x8_t a, uint8x8_t b) {
15197   return vsra_n_u8(a, b, 1);
15198 }
15199 
15200 // CHECK-LABEL: @test_vsra_n_u16(
15201 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15202 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15203 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15204 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15205 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1>
15206 // CHECK:   [[TMP4:%.*]] = add <4 x i16> [[TMP2]], [[VSRA_N]]
15207 // CHECK:   ret <4 x i16> [[TMP4]]
test_vsra_n_u16(uint16x4_t a,uint16x4_t b)15208 uint16x4_t test_vsra_n_u16(uint16x4_t a, uint16x4_t b) {
15209   return vsra_n_u16(a, b, 1);
15210 }
15211 
15212 // CHECK-LABEL: @test_vsra_n_u32(
15213 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15214 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15215 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15216 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15217 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i32> [[TMP3]], <i32 1, i32 1>
15218 // CHECK:   [[TMP4:%.*]] = add <2 x i32> [[TMP2]], [[VSRA_N]]
15219 // CHECK:   ret <2 x i32> [[TMP4]]
test_vsra_n_u32(uint32x2_t a,uint32x2_t b)15220 uint32x2_t test_vsra_n_u32(uint32x2_t a, uint32x2_t b) {
15221   return vsra_n_u32(a, b, 1);
15222 }
15223 
15224 // CHECK-LABEL: @test_vsra_n_u64(
15225 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15226 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15227 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15228 // CHECK:   [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15229 // CHECK:   [[VSRA_N:%.*]] = lshr <1 x i64> [[TMP3]], <i64 1>
15230 // CHECK:   [[TMP4:%.*]] = add <1 x i64> [[TMP2]], [[VSRA_N]]
15231 // CHECK:   ret <1 x i64> [[TMP4]]
test_vsra_n_u64(uint64x1_t a,uint64x1_t b)15232 uint64x1_t test_vsra_n_u64(uint64x1_t a, uint64x1_t b) {
15233   return vsra_n_u64(a, b, 1);
15234 }
15235 
15236 // CHECK-LABEL: @test_vsraq_n_s8(
15237 // CHECK:   [[VSRA_N:%.*]] = ashr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15238 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15239 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_s8(int8x16_t a,int8x16_t b)15240 int8x16_t test_vsraq_n_s8(int8x16_t a, int8x16_t b) {
15241   return vsraq_n_s8(a, b, 1);
15242 }
15243 
15244 // CHECK-LABEL: @test_vsraq_n_s16(
15245 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15246 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15247 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15248 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15249 // CHECK:   [[VSRA_N:%.*]] = ashr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15250 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15251 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_s16(int16x8_t a,int16x8_t b)15252 int16x8_t test_vsraq_n_s16(int16x8_t a, int16x8_t b) {
15253   return vsraq_n_s16(a, b, 1);
15254 }
15255 
15256 // CHECK-LABEL: @test_vsraq_n_s32(
15257 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15258 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15259 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15260 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15261 // CHECK:   [[VSRA_N:%.*]] = ashr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15262 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15263 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_s32(int32x4_t a,int32x4_t b)15264 int32x4_t test_vsraq_n_s32(int32x4_t a, int32x4_t b) {
15265   return vsraq_n_s32(a, b, 1);
15266 }
15267 
15268 // CHECK-LABEL: @test_vsraq_n_s64(
15269 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15270 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15271 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15272 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15273 // CHECK:   [[VSRA_N:%.*]] = ashr <2 x i64> [[TMP3]], <i64 1, i64 1>
15274 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15275 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_s64(int64x2_t a,int64x2_t b)15276 int64x2_t test_vsraq_n_s64(int64x2_t a, int64x2_t b) {
15277   return vsraq_n_s64(a, b, 1);
15278 }
15279 
15280 // CHECK-LABEL: @test_vsraq_n_u8(
15281 // CHECK:   [[VSRA_N:%.*]] = lshr <16 x i8> %b, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
15282 // CHECK:   [[TMP0:%.*]] = add <16 x i8> %a, [[VSRA_N]]
15283 // CHECK:   ret <16 x i8> [[TMP0]]
test_vsraq_n_u8(uint8x16_t a,uint8x16_t b)15284 uint8x16_t test_vsraq_n_u8(uint8x16_t a, uint8x16_t b) {
15285   return vsraq_n_u8(a, b, 1);
15286 }
15287 
15288 // CHECK-LABEL: @test_vsraq_n_u16(
15289 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15290 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15291 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15292 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15293 // CHECK:   [[VSRA_N:%.*]] = lshr <8 x i16> [[TMP3]], <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
15294 // CHECK:   [[TMP4:%.*]] = add <8 x i16> [[TMP2]], [[VSRA_N]]
15295 // CHECK:   ret <8 x i16> [[TMP4]]
test_vsraq_n_u16(uint16x8_t a,uint16x8_t b)15296 uint16x8_t test_vsraq_n_u16(uint16x8_t a, uint16x8_t b) {
15297   return vsraq_n_u16(a, b, 1);
15298 }
15299 
15300 // CHECK-LABEL: @test_vsraq_n_u32(
15301 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15302 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15303 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15304 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15305 // CHECK:   [[VSRA_N:%.*]] = lshr <4 x i32> [[TMP3]], <i32 1, i32 1, i32 1, i32 1>
15306 // CHECK:   [[TMP4:%.*]] = add <4 x i32> [[TMP2]], [[VSRA_N]]
15307 // CHECK:   ret <4 x i32> [[TMP4]]
test_vsraq_n_u32(uint32x4_t a,uint32x4_t b)15308 uint32x4_t test_vsraq_n_u32(uint32x4_t a, uint32x4_t b) {
15309   return vsraq_n_u32(a, b, 1);
15310 }
15311 
15312 // CHECK-LABEL: @test_vsraq_n_u64(
15313 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15314 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15315 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15316 // CHECK:   [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15317 // CHECK:   [[VSRA_N:%.*]] = lshr <2 x i64> [[TMP3]], <i64 1, i64 1>
15318 // CHECK:   [[TMP4:%.*]] = add <2 x i64> [[TMP2]], [[VSRA_N]]
15319 // CHECK:   ret <2 x i64> [[TMP4]]
test_vsraq_n_u64(uint64x2_t a,uint64x2_t b)15320 uint64x2_t test_vsraq_n_u64(uint64x2_t a, uint64x2_t b) {
15321   return vsraq_n_u64(a, b, 1);
15322 }
15323 
15324 // CHECK-LABEL: @test_vsri_n_s8(
15325 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15326 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsri_n_s8(int8x8_t a,int8x8_t b)15327 int8x8_t test_vsri_n_s8(int8x8_t a, int8x8_t b) {
15328   return vsri_n_s8(a, b, 1);
15329 }
15330 
15331 // CHECK-LABEL: @test_vsri_n_s16(
15332 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15333 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15334 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15335 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15336 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15337 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsri_n_s16(int16x4_t a,int16x4_t b)15338 int16x4_t test_vsri_n_s16(int16x4_t a, int16x4_t b) {
15339   return vsri_n_s16(a, b, 1);
15340 }
15341 
15342 // CHECK-LABEL: @test_vsri_n_s32(
15343 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15344 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15345 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15346 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15347 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15348 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsri_n_s32(int32x2_t a,int32x2_t b)15349 int32x2_t test_vsri_n_s32(int32x2_t a, int32x2_t b) {
15350   return vsri_n_s32(a, b, 1);
15351 }
15352 
15353 // CHECK-LABEL: @test_vsri_n_s64(
15354 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15355 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15356 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15357 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15358 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15359 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsri_n_s64(int64x1_t a,int64x1_t b)15360 int64x1_t test_vsri_n_s64(int64x1_t a, int64x1_t b) {
15361   return vsri_n_s64(a, b, 1);
15362 }
15363 
15364 // CHECK-LABEL: @test_vsri_n_u8(
15365 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15366 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsri_n_u8(uint8x8_t a,uint8x8_t b)15367 uint8x8_t test_vsri_n_u8(uint8x8_t a, uint8x8_t b) {
15368   return vsri_n_u8(a, b, 1);
15369 }
15370 
15371 // CHECK-LABEL: @test_vsri_n_u16(
15372 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15373 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15374 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15375 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15376 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15377 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsri_n_u16(uint16x4_t a,uint16x4_t b)15378 uint16x4_t test_vsri_n_u16(uint16x4_t a, uint16x4_t b) {
15379   return vsri_n_u16(a, b, 1);
15380 }
15381 
15382 // CHECK-LABEL: @test_vsri_n_u32(
15383 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
15384 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15385 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
15386 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15387 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i32> @llvm.arm.neon.vshiftins.v2i32(<2 x i32> [[VSLI_N]], <2 x i32> [[VSLI_N1]], <2 x i32> <i32 -1, i32 -1>)
15388 // CHECK:   ret <2 x i32> [[VSLI_N2]]
test_vsri_n_u32(uint32x2_t a,uint32x2_t b)15389 uint32x2_t test_vsri_n_u32(uint32x2_t a, uint32x2_t b) {
15390   return vsri_n_u32(a, b, 1);
15391 }
15392 
15393 // CHECK-LABEL: @test_vsri_n_u64(
15394 // CHECK:   [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
15395 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15396 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
15397 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15398 // CHECK:   [[VSLI_N2:%.*]] = call <1 x i64> @llvm.arm.neon.vshiftins.v1i64(<1 x i64> [[VSLI_N]], <1 x i64> [[VSLI_N1]], <1 x i64> <i64 -1>)
15399 // CHECK:   ret <1 x i64> [[VSLI_N2]]
test_vsri_n_u64(uint64x1_t a,uint64x1_t b)15400 uint64x1_t test_vsri_n_u64(uint64x1_t a, uint64x1_t b) {
15401   return vsri_n_u64(a, b, 1);
15402 }
15403 
15404 // CHECK-LABEL: @test_vsri_n_p8(
15405 // CHECK:   [[VSLI_N:%.*]] = call <8 x i8> @llvm.arm.neon.vshiftins.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15406 // CHECK:   ret <8 x i8> [[VSLI_N]]
test_vsri_n_p8(poly8x8_t a,poly8x8_t b)15407 poly8x8_t test_vsri_n_p8(poly8x8_t a, poly8x8_t b) {
15408   return vsri_n_p8(a, b, 1);
15409 }
15410 
15411 // CHECK-LABEL: @test_vsri_n_p16(
15412 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
15413 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15414 // CHECK:   [[VSLI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
15415 // CHECK:   [[VSLI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15416 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i16> @llvm.arm.neon.vshiftins.v4i16(<4 x i16> [[VSLI_N]], <4 x i16> [[VSLI_N1]], <4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>)
15417 // CHECK:   ret <4 x i16> [[VSLI_N2]]
test_vsri_n_p16(poly16x4_t a,poly16x4_t b)15418 poly16x4_t test_vsri_n_p16(poly16x4_t a, poly16x4_t b) {
15419   return vsri_n_p16(a, b, 1);
15420 }
15421 
15422 // CHECK-LABEL: @test_vsriq_n_s8(
15423 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15424 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsriq_n_s8(int8x16_t a,int8x16_t b)15425 int8x16_t test_vsriq_n_s8(int8x16_t a, int8x16_t b) {
15426   return vsriq_n_s8(a, b, 1);
15427 }
15428 
15429 // CHECK-LABEL: @test_vsriq_n_s16(
15430 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15431 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15432 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15433 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15434 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15435 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_s16(int16x8_t a,int16x8_t b)15436 int16x8_t test_vsriq_n_s16(int16x8_t a, int16x8_t b) {
15437   return vsriq_n_s16(a, b, 1);
15438 }
15439 
15440 // CHECK-LABEL: @test_vsriq_n_s32(
15441 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15442 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15443 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15444 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15445 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15446 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_s32(int32x4_t a,int32x4_t b)15447 int32x4_t test_vsriq_n_s32(int32x4_t a, int32x4_t b) {
15448   return vsriq_n_s32(a, b, 1);
15449 }
15450 
15451 // CHECK-LABEL: @test_vsriq_n_s64(
15452 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15453 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15454 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15455 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15456 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15457 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_s64(int64x2_t a,int64x2_t b)15458 int64x2_t test_vsriq_n_s64(int64x2_t a, int64x2_t b) {
15459   return vsriq_n_s64(a, b, 1);
15460 }
15461 
15462 // CHECK-LABEL: @test_vsriq_n_u8(
15463 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15464 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsriq_n_u8(uint8x16_t a,uint8x16_t b)15465 uint8x16_t test_vsriq_n_u8(uint8x16_t a, uint8x16_t b) {
15466   return vsriq_n_u8(a, b, 1);
15467 }
15468 
15469 // CHECK-LABEL: @test_vsriq_n_u16(
15470 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15471 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15472 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15473 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15474 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15475 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_u16(uint16x8_t a,uint16x8_t b)15476 uint16x8_t test_vsriq_n_u16(uint16x8_t a, uint16x8_t b) {
15477   return vsriq_n_u16(a, b, 1);
15478 }
15479 
15480 // CHECK-LABEL: @test_vsriq_n_u32(
15481 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
15482 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15483 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
15484 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15485 // CHECK:   [[VSLI_N2:%.*]] = call <4 x i32> @llvm.arm.neon.vshiftins.v4i32(<4 x i32> [[VSLI_N]], <4 x i32> [[VSLI_N1]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>)
15486 // CHECK:   ret <4 x i32> [[VSLI_N2]]
test_vsriq_n_u32(uint32x4_t a,uint32x4_t b)15487 uint32x4_t test_vsriq_n_u32(uint32x4_t a, uint32x4_t b) {
15488   return vsriq_n_u32(a, b, 1);
15489 }
15490 
15491 // CHECK-LABEL: @test_vsriq_n_u64(
15492 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
15493 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15494 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
15495 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15496 // CHECK:   [[VSLI_N2:%.*]] = call <2 x i64> @llvm.arm.neon.vshiftins.v2i64(<2 x i64> [[VSLI_N]], <2 x i64> [[VSLI_N1]], <2 x i64> <i64 -1, i64 -1>)
15497 // CHECK:   ret <2 x i64> [[VSLI_N2]]
test_vsriq_n_u64(uint64x2_t a,uint64x2_t b)15498 uint64x2_t test_vsriq_n_u64(uint64x2_t a, uint64x2_t b) {
15499   return vsriq_n_u64(a, b, 1);
15500 }
15501 
15502 // CHECK-LABEL: @test_vsriq_n_p8(
15503 // CHECK:   [[VSLI_N:%.*]] = call <16 x i8> @llvm.arm.neon.vshiftins.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>)
15504 // CHECK:   ret <16 x i8> [[VSLI_N]]
test_vsriq_n_p8(poly8x16_t a,poly8x16_t b)15505 poly8x16_t test_vsriq_n_p8(poly8x16_t a, poly8x16_t b) {
15506   return vsriq_n_p8(a, b, 1);
15507 }
15508 
15509 // CHECK-LABEL: @test_vsriq_n_p16(
15510 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
15511 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15512 // CHECK:   [[VSLI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
15513 // CHECK:   [[VSLI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15514 // CHECK:   [[VSLI_N2:%.*]] = call <8 x i16> @llvm.arm.neon.vshiftins.v8i16(<8 x i16> [[VSLI_N]], <8 x i16> [[VSLI_N1]], <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>)
15515 // CHECK:   ret <8 x i16> [[VSLI_N2]]
test_vsriq_n_p16(poly16x8_t a,poly16x8_t b)15516 poly16x8_t test_vsriq_n_p16(poly16x8_t a, poly16x8_t b) {
15517   return vsriq_n_p16(a, b, 1);
15518 }
15519 
15520 // CHECK-LABEL: @test_vst1q_u8(
15521 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15522 // CHECK:   ret void
test_vst1q_u8(uint8_t * a,uint8x16_t b)15523 void test_vst1q_u8(uint8_t * a, uint8x16_t b) {
15524   vst1q_u8(a, b);
15525 }
15526 
15527 // CHECK-LABEL: @test_vst1q_u16(
15528 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15529 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15530 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15531 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15532 // CHECK:   ret void
test_vst1q_u16(uint16_t * a,uint16x8_t b)15533 void test_vst1q_u16(uint16_t * a, uint16x8_t b) {
15534   vst1q_u16(a, b);
15535 }
15536 
15537 // CHECK-LABEL: @test_vst1q_u32(
15538 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15539 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15540 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15541 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15542 // CHECK:   ret void
test_vst1q_u32(uint32_t * a,uint32x4_t b)15543 void test_vst1q_u32(uint32_t * a, uint32x4_t b) {
15544   vst1q_u32(a, b);
15545 }
15546 
15547 // CHECK-LABEL: @test_vst1q_u64(
15548 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15549 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15550 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15551 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15552 // CHECK:   ret void
test_vst1q_u64(uint64_t * a,uint64x2_t b)15553 void test_vst1q_u64(uint64_t * a, uint64x2_t b) {
15554   vst1q_u64(a, b);
15555 }
15556 
15557 // CHECK-LABEL: @test_vst1q_s8(
15558 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15559 // CHECK:   ret void
test_vst1q_s8(int8_t * a,int8x16_t b)15560 void test_vst1q_s8(int8_t * a, int8x16_t b) {
15561   vst1q_s8(a, b);
15562 }
15563 
15564 // CHECK-LABEL: @test_vst1q_s16(
15565 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15566 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15567 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15568 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15569 // CHECK:   ret void
test_vst1q_s16(int16_t * a,int16x8_t b)15570 void test_vst1q_s16(int16_t * a, int16x8_t b) {
15571   vst1q_s16(a, b);
15572 }
15573 
15574 // CHECK-LABEL: @test_vst1q_s32(
15575 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15576 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15577 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15578 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i32(i8* [[TMP0]], <4 x i32> [[TMP2]], i32 4)
15579 // CHECK:   ret void
test_vst1q_s32(int32_t * a,int32x4_t b)15580 void test_vst1q_s32(int32_t * a, int32x4_t b) {
15581   vst1q_s32(a, b);
15582 }
15583 
15584 // CHECK-LABEL: @test_vst1q_s64(
15585 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15586 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15587 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15588 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i64(i8* [[TMP0]], <2 x i64> [[TMP2]], i32 4)
15589 // CHECK:   ret void
test_vst1q_s64(int64_t * a,int64x2_t b)15590 void test_vst1q_s64(int64_t * a, int64x2_t b) {
15591   vst1q_s64(a, b);
15592 }
15593 
15594 // CHECK-LABEL: @test_vst1q_f16(
15595 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15596 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15597 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15598 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8f16(i8* [[TMP0]], <8 x half> [[TMP2]], i32 2)
15599 // CHECK:   ret void
test_vst1q_f16(float16_t * a,float16x8_t b)15600 void test_vst1q_f16(float16_t * a, float16x8_t b) {
15601   vst1q_f16(a, b);
15602 }
15603 
15604 // CHECK-LABEL: @test_vst1q_f32(
15605 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15606 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15607 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15608 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4f32(i8* [[TMP0]], <4 x float> [[TMP2]], i32 4)
15609 // CHECK:   ret void
test_vst1q_f32(float32_t * a,float32x4_t b)15610 void test_vst1q_f32(float32_t * a, float32x4_t b) {
15611   vst1q_f32(a, b);
15612 }
15613 
15614 // CHECK-LABEL: @test_vst1q_p8(
15615 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v16i8(i8* %a, <16 x i8> %b, i32 1)
15616 // CHECK:   ret void
test_vst1q_p8(poly8_t * a,poly8x16_t b)15617 void test_vst1q_p8(poly8_t * a, poly8x16_t b) {
15618   vst1q_p8(a, b);
15619 }
15620 
15621 // CHECK-LABEL: @test_vst1q_p16(
15622 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15623 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15624 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15625 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i16(i8* [[TMP0]], <8 x i16> [[TMP2]], i32 2)
15626 // CHECK:   ret void
test_vst1q_p16(poly16_t * a,poly16x8_t b)15627 void test_vst1q_p16(poly16_t * a, poly16x8_t b) {
15628   vst1q_p16(a, b);
15629 }
15630 
15631 // CHECK-LABEL: @test_vst1_u8(
15632 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15633 // CHECK:   ret void
test_vst1_u8(uint8_t * a,uint8x8_t b)15634 void test_vst1_u8(uint8_t * a, uint8x8_t b) {
15635   vst1_u8(a, b);
15636 }
15637 
15638 // CHECK-LABEL: @test_vst1_u16(
15639 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15640 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15641 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15642 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15643 // CHECK:   ret void
test_vst1_u16(uint16_t * a,uint16x4_t b)15644 void test_vst1_u16(uint16_t * a, uint16x4_t b) {
15645   vst1_u16(a, b);
15646 }
15647 
15648 // CHECK-LABEL: @test_vst1_u32(
15649 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15650 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15651 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15652 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15653 // CHECK:   ret void
test_vst1_u32(uint32_t * a,uint32x2_t b)15654 void test_vst1_u32(uint32_t * a, uint32x2_t b) {
15655   vst1_u32(a, b);
15656 }
15657 
15658 // CHECK-LABEL: @test_vst1_u64(
15659 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15660 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15661 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15662 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15663 // CHECK:   ret void
test_vst1_u64(uint64_t * a,uint64x1_t b)15664 void test_vst1_u64(uint64_t * a, uint64x1_t b) {
15665   vst1_u64(a, b);
15666 }
15667 
15668 // CHECK-LABEL: @test_vst1_s8(
15669 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15670 // CHECK:   ret void
test_vst1_s8(int8_t * a,int8x8_t b)15671 void test_vst1_s8(int8_t * a, int8x8_t b) {
15672   vst1_s8(a, b);
15673 }
15674 
15675 // CHECK-LABEL: @test_vst1_s16(
15676 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15677 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15678 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15679 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15680 // CHECK:   ret void
test_vst1_s16(int16_t * a,int16x4_t b)15681 void test_vst1_s16(int16_t * a, int16x4_t b) {
15682   vst1_s16(a, b);
15683 }
15684 
15685 // CHECK-LABEL: @test_vst1_s32(
15686 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15687 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15688 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15689 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2i32(i8* [[TMP0]], <2 x i32> [[TMP2]], i32 4)
15690 // CHECK:   ret void
test_vst1_s32(int32_t * a,int32x2_t b)15691 void test_vst1_s32(int32_t * a, int32x2_t b) {
15692   vst1_s32(a, b);
15693 }
15694 
15695 // CHECK-LABEL: @test_vst1_s64(
15696 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15697 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15698 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15699 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP2]], i32 4)
15700 // CHECK:   ret void
test_vst1_s64(int64_t * a,int64x1_t b)15701 void test_vst1_s64(int64_t * a, int64x1_t b) {
15702   vst1_s64(a, b);
15703 }
15704 
15705 // CHECK-LABEL: @test_vst1_f16(
15706 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15707 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15708 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15709 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4f16(i8* [[TMP0]], <4 x half> [[TMP2]], i32 2)
15710 // CHECK:   ret void
test_vst1_f16(float16_t * a,float16x4_t b)15711 void test_vst1_f16(float16_t * a, float16x4_t b) {
15712   vst1_f16(a, b);
15713 }
15714 
15715 // CHECK-LABEL: @test_vst1_f32(
15716 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15717 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15718 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15719 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v2f32(i8* [[TMP0]], <2 x float> [[TMP2]], i32 4)
15720 // CHECK:   ret void
test_vst1_f32(float32_t * a,float32x2_t b)15721 void test_vst1_f32(float32_t * a, float32x2_t b) {
15722   vst1_f32(a, b);
15723 }
15724 
15725 // CHECK-LABEL: @test_vst1_p8(
15726 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v8i8(i8* %a, <8 x i8> %b, i32 1)
15727 // CHECK:   ret void
test_vst1_p8(poly8_t * a,poly8x8_t b)15728 void test_vst1_p8(poly8_t * a, poly8x8_t b) {
15729   vst1_p8(a, b);
15730 }
15731 
15732 // CHECK-LABEL: @test_vst1_p16(
15733 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15734 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15735 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15736 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v4i16(i8* [[TMP0]], <4 x i16> [[TMP2]], i32 2)
15737 // CHECK:   ret void
test_vst1_p16(poly16_t * a,poly16x4_t b)15738 void test_vst1_p16(poly16_t * a, poly16x4_t b) {
15739   vst1_p16(a, b);
15740 }
15741 
15742 // CHECK-LABEL: @test_vst1q_lane_u8(
15743 // CHECK:   [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15744 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15745 // CHECK:   ret void
test_vst1q_lane_u8(uint8_t * a,uint8x16_t b)15746 void test_vst1q_lane_u8(uint8_t * a, uint8x16_t b) {
15747   vst1q_lane_u8(a, b, 15);
15748 }
15749 
15750 // CHECK-LABEL: @test_vst1q_lane_u16(
15751 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15752 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15753 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15754 // CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15755 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15756 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15757 // CHECK:   ret void
test_vst1q_lane_u16(uint16_t * a,uint16x8_t b)15758 void test_vst1q_lane_u16(uint16_t * a, uint16x8_t b) {
15759   vst1q_lane_u16(a, b, 7);
15760 }
15761 
15762 // CHECK-LABEL: @test_vst1q_lane_u32(
15763 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15764 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15765 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15766 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15767 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15768 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15769 // CHECK:   ret void
test_vst1q_lane_u32(uint32_t * a,uint32x4_t b)15770 void test_vst1q_lane_u32(uint32_t * a, uint32x4_t b) {
15771   vst1q_lane_u32(a, b, 3);
15772 }
15773 
15774 // CHECK-LABEL: @test_vst1q_lane_u64(
15775 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15776 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15777 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15778 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15779 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15780 // CHECK:   ret void
test_vst1q_lane_u64(uint64_t * a,uint64x2_t b)15781 void test_vst1q_lane_u64(uint64_t * a, uint64x2_t b) {
15782   vst1q_lane_u64(a, b, 1);
15783 }
15784 
15785 // CHECK-LABEL: @test_vst1q_lane_s8(
15786 // CHECK:   [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15787 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15788 // CHECK:   ret void
test_vst1q_lane_s8(int8_t * a,int8x16_t b)15789 void test_vst1q_lane_s8(int8_t * a, int8x16_t b) {
15790   vst1q_lane_s8(a, b, 15);
15791 }
15792 
15793 // CHECK-LABEL: @test_vst1q_lane_s16(
15794 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15795 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15796 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15797 // CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15798 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15799 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15800 // CHECK:   ret void
test_vst1q_lane_s16(int16_t * a,int16x8_t b)15801 void test_vst1q_lane_s16(int16_t * a, int16x8_t b) {
15802   vst1q_lane_s16(a, b, 7);
15803 }
15804 
15805 // CHECK-LABEL: @test_vst1q_lane_s32(
15806 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15807 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
15808 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
15809 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i32 3
15810 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15811 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15812 // CHECK:   ret void
test_vst1q_lane_s32(int32_t * a,int32x4_t b)15813 void test_vst1q_lane_s32(int32_t * a, int32x4_t b) {
15814   vst1q_lane_s32(a, b, 3);
15815 }
15816 
15817 // CHECK-LABEL: @test_vst1q_lane_s64(
15818 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15819 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
15820 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
15821 // CHECK:   [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP2]], <1 x i32> <i32 1>
15822 // CHECK:   call void @llvm.arm.neon.vst1.p0i8.v1i64(i8* [[TMP0]], <1 x i64> [[TMP3]], i32 4)
15823 // CHECK:   ret void
test_vst1q_lane_s64(int64_t * a,int64x2_t b)15824 void test_vst1q_lane_s64(int64_t * a, int64x2_t b) {
15825   vst1q_lane_s64(a, b, 1);
15826 }
15827 
15828 // CHECK-LABEL: @test_vst1q_lane_f16(
15829 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15830 // CHECK:   [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8>
15831 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half>
15832 // CHECK:   [[TMP3:%.*]] = extractelement <8 x half> [[TMP2]], i32 7
15833 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15834 // CHECK:   store half [[TMP3]], half* [[TMP4]], align 2
15835 // CHECK:   ret void
test_vst1q_lane_f16(float16_t * a,float16x8_t b)15836 void test_vst1q_lane_f16(float16_t * a, float16x8_t b) {
15837   vst1q_lane_f16(a, b, 7);
15838 }
15839 
15840 // CHECK-LABEL: @test_vst1q_lane_f32(
15841 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15842 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %b to <16 x i8>
15843 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x float>
15844 // CHECK:   [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
15845 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15846 // CHECK:   store float [[TMP3]], float* [[TMP4]], align 4
15847 // CHECK:   ret void
test_vst1q_lane_f32(float32_t * a,float32x4_t b)15848 void test_vst1q_lane_f32(float32_t * a, float32x4_t b) {
15849   vst1q_lane_f32(a, b, 3);
15850 }
15851 
15852 // CHECK-LABEL: @test_vst1q_lane_p8(
15853 // CHECK:   [[TMP0:%.*]] = extractelement <16 x i8> %b, i32 15
15854 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15855 // CHECK:   ret void
test_vst1q_lane_p8(poly8_t * a,poly8x16_t b)15856 void test_vst1q_lane_p8(poly8_t * a, poly8x16_t b) {
15857   vst1q_lane_p8(a, b, 15);
15858 }
15859 
15860 // CHECK-LABEL: @test_vst1q_lane_p16(
15861 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15862 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
15863 // CHECK:   [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
15864 // CHECK:   [[TMP3:%.*]] = extractelement <8 x i16> [[TMP2]], i32 7
15865 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15866 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15867 // CHECK:   ret void
test_vst1q_lane_p16(poly16_t * a,poly16x8_t b)15868 void test_vst1q_lane_p16(poly16_t * a, poly16x8_t b) {
15869   vst1q_lane_p16(a, b, 7);
15870 }
15871 
15872 // CHECK-LABEL: @test_vst1_lane_u8(
15873 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15874 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15875 // CHECK:   ret void
test_vst1_lane_u8(uint8_t * a,uint8x8_t b)15876 void test_vst1_lane_u8(uint8_t * a, uint8x8_t b) {
15877   vst1_lane_u8(a, b, 7);
15878 }
15879 
15880 // CHECK-LABEL: @test_vst1_lane_u16(
15881 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15882 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15883 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15884 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15885 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15886 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15887 // CHECK:   ret void
test_vst1_lane_u16(uint16_t * a,uint16x4_t b)15888 void test_vst1_lane_u16(uint16_t * a, uint16x4_t b) {
15889   vst1_lane_u16(a, b, 3);
15890 }
15891 
15892 // CHECK-LABEL: @test_vst1_lane_u32(
15893 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15894 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15895 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15896 // CHECK:   [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15897 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15898 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15899 // CHECK:   ret void
test_vst1_lane_u32(uint32_t * a,uint32x2_t b)15900 void test_vst1_lane_u32(uint32_t * a, uint32x2_t b) {
15901   vst1_lane_u32(a, b, 1);
15902 }
15903 
15904 // CHECK-LABEL: @test_vst1_lane_u64(
15905 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15906 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15907 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15908 // CHECK:   [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15909 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15910 // CHECK:   store i64 [[TMP3]], i64* [[TMP4]], align 4
15911 // CHECK:   ret void
test_vst1_lane_u64(uint64_t * a,uint64x1_t b)15912 void test_vst1_lane_u64(uint64_t * a, uint64x1_t b) {
15913   vst1_lane_u64(a, b, 0);
15914 }
15915 
15916 // CHECK-LABEL: @test_vst1_lane_s8(
15917 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15918 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15919 // CHECK:   ret void
test_vst1_lane_s8(int8_t * a,int8x8_t b)15920 void test_vst1_lane_s8(int8_t * a, int8x8_t b) {
15921   vst1_lane_s8(a, b, 7);
15922 }
15923 
15924 // CHECK-LABEL: @test_vst1_lane_s16(
15925 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15926 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15927 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15928 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15929 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15930 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15931 // CHECK:   ret void
test_vst1_lane_s16(int16_t * a,int16x4_t b)15932 void test_vst1_lane_s16(int16_t * a, int16x4_t b) {
15933   vst1_lane_s16(a, b, 3);
15934 }
15935 
15936 // CHECK-LABEL: @test_vst1_lane_s32(
15937 // CHECK:   [[TMP0:%.*]] = bitcast i32* %a to i8*
15938 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
15939 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
15940 // CHECK:   [[TMP3:%.*]] = extractelement <2 x i32> [[TMP2]], i32 1
15941 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i32*
15942 // CHECK:   store i32 [[TMP3]], i32* [[TMP4]], align 4
15943 // CHECK:   ret void
test_vst1_lane_s32(int32_t * a,int32x2_t b)15944 void test_vst1_lane_s32(int32_t * a, int32x2_t b) {
15945   vst1_lane_s32(a, b, 1);
15946 }
15947 
15948 // CHECK-LABEL: @test_vst1_lane_s64(
15949 // CHECK:   [[TMP0:%.*]] = bitcast i64* %a to i8*
15950 // CHECK:   [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
15951 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
15952 // CHECK:   [[TMP3:%.*]] = extractelement <1 x i64> [[TMP2]], i32 0
15953 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i64*
15954 // CHECK:   store i64 [[TMP3]], i64* [[TMP4]], align 4
15955 // CHECK:   ret void
test_vst1_lane_s64(int64_t * a,int64x1_t b)15956 void test_vst1_lane_s64(int64_t * a, int64x1_t b) {
15957   vst1_lane_s64(a, b, 0);
15958 }
15959 
15960 // CHECK-LABEL: @test_vst1_lane_f16(
15961 // CHECK:   [[TMP0:%.*]] = bitcast half* %a to i8*
15962 // CHECK:   [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8>
15963 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half>
15964 // CHECK:   [[TMP3:%.*]] = extractelement <4 x half> [[TMP2]], i32 3
15965 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to half*
15966 // CHECK:   store half [[TMP3]], half* [[TMP4]], align 2
15967 // CHECK:   ret void
test_vst1_lane_f16(float16_t * a,float16x4_t b)15968 void test_vst1_lane_f16(float16_t * a, float16x4_t b) {
15969   vst1_lane_f16(a, b, 3);
15970 }
15971 
15972 // CHECK-LABEL: @test_vst1_lane_f32(
15973 // CHECK:   [[TMP0:%.*]] = bitcast float* %a to i8*
15974 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %b to <8 x i8>
15975 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x float>
15976 // CHECK:   [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
15977 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to float*
15978 // CHECK:   store float [[TMP3]], float* [[TMP4]], align 4
15979 // CHECK:   ret void
test_vst1_lane_f32(float32_t * a,float32x2_t b)15980 void test_vst1_lane_f32(float32_t * a, float32x2_t b) {
15981   vst1_lane_f32(a, b, 1);
15982 }
15983 
15984 // CHECK-LABEL: @test_vst1_lane_p8(
15985 // CHECK:   [[TMP0:%.*]] = extractelement <8 x i8> %b, i32 7
15986 // CHECK:   store i8 [[TMP0]], i8* %a, align 1
15987 // CHECK:   ret void
test_vst1_lane_p8(poly8_t * a,poly8x8_t b)15988 void test_vst1_lane_p8(poly8_t * a, poly8x8_t b) {
15989   vst1_lane_p8(a, b, 7);
15990 }
15991 
15992 // CHECK-LABEL: @test_vst1_lane_p16(
15993 // CHECK:   [[TMP0:%.*]] = bitcast i16* %a to i8*
15994 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
15995 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
15996 // CHECK:   [[TMP3:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
15997 // CHECK:   [[TMP4:%.*]] = bitcast i8* [[TMP0]] to i16*
15998 // CHECK:   store i16 [[TMP3]], i16* [[TMP4]], align 2
15999 // CHECK:   ret void
test_vst1_lane_p16(poly16_t * a,poly16x4_t b)16000 void test_vst1_lane_p16(poly16_t * a, poly16x4_t b) {
16001   vst1_lane_p16(a, b, 3);
16002 }
16003 
16004 // CHECK-LABEL: @test_vst2q_u8(
16005 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x2_t, align 16
16006 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x2_t, align 16
16007 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[B]], i32 0, i32 0
16008 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16009 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16010 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x2_t* [[__S1]] to i8*
16011 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x2_t* [[B]] to i8*
16012 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16013 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
16014 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16015 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16016 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[__S1]], i32 0, i32 0
16017 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16018 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16019 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16020 // CHECK:   ret void
test_vst2q_u8(uint8_t * a,uint8x16x2_t b)16021 void test_vst2q_u8(uint8_t * a, uint8x16x2_t b) {
16022   vst2q_u8(a, b);
16023 }
16024 
16025 // CHECK-LABEL: @test_vst2q_u16(
16026 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
16027 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
16028 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
16029 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16030 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16031 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
16032 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
16033 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16034 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16035 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16036 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16037 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16038 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16039 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16040 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16041 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16042 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16043 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16044 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16045 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16046 // CHECK:   ret void
test_vst2q_u16(uint16_t * a,uint16x8x2_t b)16047 void test_vst2q_u16(uint16_t * a, uint16x8x2_t b) {
16048   vst2q_u16(a, b);
16049 }
16050 
16051 // CHECK-LABEL: @test_vst2q_u32(
16052 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16053 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16054 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16055 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16056 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16057 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16058 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16059 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16060 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16061 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16062 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16063 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16064 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16065 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16066 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16067 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16068 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16069 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16070 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16071 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16072 // CHECK:   ret void
test_vst2q_u32(uint32_t * a,uint32x4x2_t b)16073 void test_vst2q_u32(uint32_t * a, uint32x4x2_t b) {
16074   vst2q_u32(a, b);
16075 }
16076 
16077 // CHECK-LABEL: @test_vst2q_s8(
16078 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x2_t, align 16
16079 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x2_t, align 16
16080 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[B]], i32 0, i32 0
16081 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16082 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16083 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x2_t* [[__S1]] to i8*
16084 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x2_t* [[B]] to i8*
16085 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16086 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16087 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16088 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16089 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[__S1]], i32 0, i32 0
16090 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16091 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16092 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16093 // CHECK:   ret void
test_vst2q_s8(int8_t * a,int8x16x2_t b)16094 void test_vst2q_s8(int8_t * a, int8x16x2_t b) {
16095   vst2q_s8(a, b);
16096 }
16097 
16098 // CHECK-LABEL: @test_vst2q_s16(
16099 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16100 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16101 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16102 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16103 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16104 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16105 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16106 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16107 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16108 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16109 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16110 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16111 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16112 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16113 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16114 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16115 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16116 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16117 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16118 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16119 // CHECK:   ret void
test_vst2q_s16(int16_t * a,int16x8x2_t b)16120 void test_vst2q_s16(int16_t * a, int16x8x2_t b) {
16121   vst2q_s16(a, b);
16122 }
16123 
16124 // CHECK-LABEL: @test_vst2q_s32(
16125 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16126 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16127 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16128 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16129 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16130 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16131 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16132 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16133 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16134 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16135 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16136 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16137 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16138 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16139 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16140 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16141 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16142 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16143 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16144 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 4)
16145 // CHECK:   ret void
test_vst2q_s32(int32_t * a,int32x4x2_t b)16146 void test_vst2q_s32(int32_t * a, int32x4x2_t b) {
16147   vst2q_s32(a, b);
16148 }
16149 
16150 // CHECK-LABEL: @test_vst2q_f16(
16151 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16152 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16153 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16154 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16155 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16156 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16157 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16158 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16159 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16160 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16161 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16162 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16163 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16164 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16165 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16166 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16167 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16168 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16169 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16170 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 2)
16171 // CHECK:   ret void
test_vst2q_f16(float16_t * a,float16x8x2_t b)16172 void test_vst2q_f16(float16_t * a, float16x8x2_t b) {
16173   vst2q_f16(a, b);
16174 }
16175 
16176 // CHECK-LABEL: @test_vst2q_f32(
16177 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16178 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16179 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16180 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16181 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16182 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16183 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16184 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16185 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16186 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16187 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16188 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16189 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16190 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16191 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16192 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16193 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16194 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16195 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16196 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 4)
16197 // CHECK:   ret void
test_vst2q_f32(float32_t * a,float32x4x2_t b)16198 void test_vst2q_f32(float32_t * a, float32x4x2_t b) {
16199   vst2q_f32(a, b);
16200 }
16201 
16202 // CHECK-LABEL: @test_vst2q_p8(
16203 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x2_t, align 16
16204 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x2_t, align 16
16205 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[B]], i32 0, i32 0
16206 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <16 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
16207 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16208 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x2_t* [[__S1]] to i8*
16209 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x2_t* [[B]] to i8*
16210 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16211 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16212 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL]], i32 0, i32 0
16213 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16214 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[__S1]], i32 0, i32 0
16215 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <16 x i8>], [2 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16216 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16217 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], i32 1)
16218 // CHECK:   ret void
test_vst2q_p8(poly8_t * a,poly8x16x2_t b)16219 void test_vst2q_p8(poly8_t * a, poly8x16x2_t b) {
16220   vst2q_p8(a, b);
16221 }
16222 
16223 // CHECK-LABEL: @test_vst2q_p16(
16224 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16225 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16226 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16227 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16228 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16229 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16230 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16231 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16232 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16233 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16234 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16235 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16236 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16237 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16238 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16239 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16240 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16241 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16242 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16243 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 2)
16244 // CHECK:   ret void
test_vst2q_p16(poly16_t * a,poly16x8x2_t b)16245 void test_vst2q_p16(poly16_t * a, poly16x8x2_t b) {
16246   vst2q_p16(a, b);
16247 }
16248 
16249 // CHECK-LABEL: @test_vst2_u8(
16250 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16251 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16252 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16253 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16254 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16255 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16256 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16257 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16258 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16259 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16260 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16261 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16262 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16263 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16264 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16265 // CHECK:   ret void
test_vst2_u8(uint8_t * a,uint8x8x2_t b)16266 void test_vst2_u8(uint8_t * a, uint8x8x2_t b) {
16267   vst2_u8(a, b);
16268 }
16269 
16270 // CHECK-LABEL: @test_vst2_u16(
16271 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16272 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16273 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16274 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16275 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16276 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16277 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16278 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16279 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16280 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16281 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16282 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16283 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16284 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16285 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16286 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16287 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16288 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16289 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16290 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16291 // CHECK:   ret void
test_vst2_u16(uint16_t * a,uint16x4x2_t b)16292 void test_vst2_u16(uint16_t * a, uint16x4x2_t b) {
16293   vst2_u16(a, b);
16294 }
16295 
16296 // CHECK-LABEL: @test_vst2_u32(
16297 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16298 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16299 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16300 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16301 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16302 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16303 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16304 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16305 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16306 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16307 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16308 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16309 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16310 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16311 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16312 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16313 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16314 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16315 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16316 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16317 // CHECK:   ret void
test_vst2_u32(uint32_t * a,uint32x2x2_t b)16318 void test_vst2_u32(uint32_t * a, uint32x2x2_t b) {
16319   vst2_u32(a, b);
16320 }
16321 
16322 // CHECK-LABEL: @test_vst2_u64(
16323 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x2_t, align 8
16324 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x2_t, align 8
16325 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[B]], i32 0, i32 0
16326 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16327 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16328 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x2_t* [[__S1]] to i8*
16329 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x2_t* [[B]] to i8*
16330 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16331 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
16332 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16333 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16334 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16335 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16336 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x2_t, %struct.uint64x1x2_t* [[__S1]], i32 0, i32 0
16337 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16338 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16339 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16340 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16341 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16342 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16343 // CHECK:   ret void
test_vst2_u64(uint64_t * a,uint64x1x2_t b)16344 void test_vst2_u64(uint64_t * a, uint64x1x2_t b) {
16345   vst2_u64(a, b);
16346 }
16347 
16348 // CHECK-LABEL: @test_vst2_s8(
16349 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16350 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16351 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16352 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16353 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16354 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16355 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16356 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16357 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16358 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16359 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16360 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16361 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16362 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16363 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16364 // CHECK:   ret void
test_vst2_s8(int8_t * a,int8x8x2_t b)16365 void test_vst2_s8(int8_t * a, int8x8x2_t b) {
16366   vst2_s8(a, b);
16367 }
16368 
16369 // CHECK-LABEL: @test_vst2_s16(
16370 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16371 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16372 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16373 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16374 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16375 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16376 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16377 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16378 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16379 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16380 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16381 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16382 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16383 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16384 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16385 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16386 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16387 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16388 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16389 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16390 // CHECK:   ret void
test_vst2_s16(int16_t * a,int16x4x2_t b)16391 void test_vst2_s16(int16_t * a, int16x4x2_t b) {
16392   vst2_s16(a, b);
16393 }
16394 
16395 // CHECK-LABEL: @test_vst2_s32(
16396 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16397 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16398 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16399 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16400 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16401 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16402 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16403 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16404 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16405 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16406 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16407 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16408 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16409 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16410 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16411 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16412 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16413 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16414 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16415 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 4)
16416 // CHECK:   ret void
test_vst2_s32(int32_t * a,int32x2x2_t b)16417 void test_vst2_s32(int32_t * a, int32x2x2_t b) {
16418   vst2_s32(a, b);
16419 }
16420 
16421 // CHECK-LABEL: @test_vst2_s64(
16422 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x2_t, align 8
16423 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x2_t, align 8
16424 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[B]], i32 0, i32 0
16425 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <1 x i64>]* [[COERCE_DIVE]] to [2 x i64]*
16426 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16427 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x2_t* [[__S1]] to i8*
16428 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x2_t* [[B]] to i8*
16429 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16430 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
16431 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16432 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL]], i32 0, i32 0
16433 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
16434 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
16435 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x2_t, %struct.int64x1x2_t* [[__S1]], i32 0, i32 0
16436 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i32 0, i32 1
16437 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
16438 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
16439 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
16440 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
16441 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP8]], <1 x i64> [[TMP9]], i32 4)
16442 // CHECK:   ret void
test_vst2_s64(int64_t * a,int64x1x2_t b)16443 void test_vst2_s64(int64_t * a, int64x1x2_t b) {
16444   vst2_s64(a, b);
16445 }
16446 
16447 // CHECK-LABEL: @test_vst2_f16(
16448 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16449 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16450 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16451 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16452 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16453 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16454 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16455 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16456 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16457 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16458 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16459 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16460 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16461 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16462 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16463 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16464 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16465 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16466 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16467 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 2)
16468 // CHECK:   ret void
test_vst2_f16(float16_t * a,float16x4x2_t b)16469 void test_vst2_f16(float16_t * a, float16x4x2_t b) {
16470   vst2_f16(a, b);
16471 }
16472 
16473 // CHECK-LABEL: @test_vst2_f32(
16474 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16475 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16476 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16477 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16478 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16479 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16480 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16481 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16482 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16483 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16484 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16485 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16486 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16487 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16488 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16489 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16490 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16491 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16492 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16493 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 4)
16494 // CHECK:   ret void
test_vst2_f32(float32_t * a,float32x2x2_t b)16495 void test_vst2_f32(float32_t * a, float32x2x2_t b) {
16496   vst2_f32(a, b);
16497 }
16498 
16499 // CHECK-LABEL: @test_vst2_p8(
16500 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16501 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16502 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16503 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16504 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16505 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16506 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16507 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16508 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16509 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16510 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16511 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16512 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16513 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16514 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 1)
16515 // CHECK:   ret void
test_vst2_p8(poly8_t * a,poly8x8x2_t b)16516 void test_vst2_p8(poly8_t * a, poly8x8x2_t b) {
16517   vst2_p8(a, b);
16518 }
16519 
16520 // CHECK-LABEL: @test_vst2_p16(
16521 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16522 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16523 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16524 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16525 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16526 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16527 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16528 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16529 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16530 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16531 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16532 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16533 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16534 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16535 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16536 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16537 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16538 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16539 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16540 // CHECK:   call void @llvm.arm.neon.vst2.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 2)
16541 // CHECK:   ret void
test_vst2_p16(poly16_t * a,poly16x4x2_t b)16542 void test_vst2_p16(poly16_t * a, poly16x4x2_t b) {
16543   vst2_p16(a, b);
16544 }
16545 
16546 // CHECK-LABEL: @test_vst2q_lane_u16(
16547 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x2_t, align 16
16548 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x2_t, align 16
16549 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[B]], i32 0, i32 0
16550 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16551 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16552 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x2_t* [[__S1]] to i8*
16553 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x2_t* [[B]] to i8*
16554 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16555 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16556 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16557 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16558 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16559 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16560 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[__S1]], i32 0, i32 0
16561 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16562 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16563 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16564 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16565 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16566 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16567 // CHECK:   ret void
test_vst2q_lane_u16(uint16_t * a,uint16x8x2_t b)16568 void test_vst2q_lane_u16(uint16_t * a, uint16x8x2_t b) {
16569   vst2q_lane_u16(a, b, 7);
16570 }
16571 
16572 // CHECK-LABEL: @test_vst2q_lane_u32(
16573 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x2_t, align 16
16574 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x2_t, align 16
16575 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[B]], i32 0, i32 0
16576 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16577 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16578 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x2_t* [[__S1]] to i8*
16579 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x2_t* [[B]] to i8*
16580 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16581 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16582 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16583 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16584 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16585 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16586 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[__S1]], i32 0, i32 0
16587 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16588 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16589 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16590 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16591 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16592 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16593 // CHECK:   ret void
test_vst2q_lane_u32(uint32_t * a,uint32x4x2_t b)16594 void test_vst2q_lane_u32(uint32_t * a, uint32x4x2_t b) {
16595   vst2q_lane_u32(a, b, 3);
16596 }
16597 
16598 // CHECK-LABEL: @test_vst2q_lane_s16(
16599 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x2_t, align 16
16600 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x2_t, align 16
16601 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[B]], i32 0, i32 0
16602 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16603 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16604 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x2_t* [[__S1]] to i8*
16605 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x2_t* [[B]] to i8*
16606 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16607 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16608 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16609 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16610 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16611 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16612 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[__S1]], i32 0, i32 0
16613 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16614 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16615 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16616 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16617 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16618 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16619 // CHECK:   ret void
test_vst2q_lane_s16(int16_t * a,int16x8x2_t b)16620 void test_vst2q_lane_s16(int16_t * a, int16x8x2_t b) {
16621   vst2q_lane_s16(a, b, 7);
16622 }
16623 
16624 // CHECK-LABEL: @test_vst2q_lane_s32(
16625 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x2_t, align 16
16626 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x2_t, align 16
16627 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[B]], i32 0, i32 0
16628 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
16629 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16630 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x2_t* [[__S1]] to i8*
16631 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x2_t* [[B]] to i8*
16632 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16633 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16634 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16635 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL]], i32 0, i32 0
16636 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
16637 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
16638 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[__S1]], i32 0, i32 0
16639 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i32>], [2 x <4 x i32>]* [[VAL1]], i32 0, i32 1
16640 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
16641 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
16642 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
16643 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
16644 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP8]], <4 x i32> [[TMP9]], i32 3, i32 4)
16645 // CHECK:   ret void
test_vst2q_lane_s32(int32_t * a,int32x4x2_t b)16646 void test_vst2q_lane_s32(int32_t * a, int32x4x2_t b) {
16647   vst2q_lane_s32(a, b, 3);
16648 }
16649 
16650 // CHECK-LABEL: @test_vst2q_lane_f16(
16651 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x2_t, align 16
16652 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x2_t, align 16
16653 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[B]], i32 0, i32 0
16654 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x half>]* [[COERCE_DIVE]] to [4 x i64]*
16655 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16656 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x2_t* [[__S1]] to i8*
16657 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x2_t* [[B]] to i8*
16658 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16659 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16660 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16661 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL]], i32 0, i32 0
16662 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
16663 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
16664 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x2_t, %struct.float16x8x2_t* [[__S1]], i32 0, i32 0
16665 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x half>], [2 x <8 x half>]* [[VAL1]], i32 0, i32 1
16666 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
16667 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
16668 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
16669 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
16670 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP8]], <8 x half> [[TMP9]], i32 7, i32 2)
16671 // CHECK:   ret void
test_vst2q_lane_f16(float16_t * a,float16x8x2_t b)16672 void test_vst2q_lane_f16(float16_t * a, float16x8x2_t b) {
16673   vst2q_lane_f16(a, b, 7);
16674 }
16675 
16676 // CHECK-LABEL: @test_vst2q_lane_f32(
16677 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x2_t, align 16
16678 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x2_t, align 16
16679 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[B]], i32 0, i32 0
16680 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x float>]* [[COERCE_DIVE]] to [4 x i64]*
16681 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16682 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x2_t* [[__S1]] to i8*
16683 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x2_t* [[B]] to i8*
16684 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16685 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16686 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16687 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL]], i32 0, i32 0
16688 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
16689 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
16690 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[__S1]], i32 0, i32 0
16691 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x float>], [2 x <4 x float>]* [[VAL1]], i32 0, i32 1
16692 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
16693 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
16694 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
16695 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
16696 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP8]], <4 x float> [[TMP9]], i32 3, i32 4)
16697 // CHECK:   ret void
test_vst2q_lane_f32(float32_t * a,float32x4x2_t b)16698 void test_vst2q_lane_f32(float32_t * a, float32x4x2_t b) {
16699   vst2q_lane_f32(a, b, 3);
16700 }
16701 
16702 // CHECK-LABEL: @test_vst2q_lane_p16(
16703 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x2_t, align 16
16704 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x2_t, align 16
16705 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[B]], i32 0, i32 0
16706 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
16707 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 16
16708 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x2_t* [[__S1]] to i8*
16709 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x2_t* [[B]] to i8*
16710 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 32, i1 false)
16711 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16712 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16713 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL]], i32 0, i32 0
16714 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
16715 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
16716 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[__S1]], i32 0, i32 0
16717 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i16>], [2 x <8 x i16>]* [[VAL1]], i32 0, i32 1
16718 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
16719 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
16720 // CHECK:   [[TMP8:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
16721 // CHECK:   [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
16722 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], i32 7, i32 2)
16723 // CHECK:   ret void
test_vst2q_lane_p16(poly16_t * a,poly16x8x2_t b)16724 void test_vst2q_lane_p16(poly16_t * a, poly16x8x2_t b) {
16725   vst2q_lane_p16(a, b, 7);
16726 }
16727 
16728 // CHECK-LABEL: @test_vst2_lane_u8(
16729 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
16730 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x2_t, align 8
16731 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
16732 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16733 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16734 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x2_t* [[__S1]] to i8*
16735 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x2_t* [[B]] to i8*
16736 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16737 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16738 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16739 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16740 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__S1]], i32 0, i32 0
16741 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16742 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16743 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16744 // CHECK:   ret void
test_vst2_lane_u8(uint8_t * a,uint8x8x2_t b)16745 void test_vst2_lane_u8(uint8_t * a, uint8x8x2_t b) {
16746   vst2_lane_u8(a, b, 7);
16747 }
16748 
16749 // CHECK-LABEL: @test_vst2_lane_u16(
16750 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x2_t, align 8
16751 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x2_t, align 8
16752 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[B]], i32 0, i32 0
16753 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16754 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16755 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x2_t* [[__S1]] to i8*
16756 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x2_t* [[B]] to i8*
16757 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16758 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16759 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16760 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16761 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16762 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16763 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[__S1]], i32 0, i32 0
16764 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16765 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16766 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16767 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16768 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16769 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16770 // CHECK:   ret void
test_vst2_lane_u16(uint16_t * a,uint16x4x2_t b)16771 void test_vst2_lane_u16(uint16_t * a, uint16x4x2_t b) {
16772   vst2_lane_u16(a, b, 3);
16773 }
16774 
16775 // CHECK-LABEL: @test_vst2_lane_u32(
16776 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x2_t, align 8
16777 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x2_t, align 8
16778 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[B]], i32 0, i32 0
16779 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16780 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16781 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x2_t* [[__S1]] to i8*
16782 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x2_t* [[B]] to i8*
16783 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16784 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16785 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16786 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16787 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16788 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16789 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[__S1]], i32 0, i32 0
16790 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16791 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16792 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16793 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16794 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16795 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16796 // CHECK:   ret void
test_vst2_lane_u32(uint32_t * a,uint32x2x2_t b)16797 void test_vst2_lane_u32(uint32_t * a, uint32x2x2_t b) {
16798   vst2_lane_u32(a, b, 1);
16799 }
16800 
16801 // CHECK-LABEL: @test_vst2_lane_s8(
16802 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
16803 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x2_t, align 8
16804 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
16805 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16806 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16807 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x2_t* [[__S1]] to i8*
16808 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x2_t* [[B]] to i8*
16809 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16810 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16811 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16812 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16813 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__S1]], i32 0, i32 0
16814 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16815 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16816 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16817 // CHECK:   ret void
test_vst2_lane_s8(int8_t * a,int8x8x2_t b)16818 void test_vst2_lane_s8(int8_t * a, int8x8x2_t b) {
16819   vst2_lane_s8(a, b, 7);
16820 }
16821 
16822 // CHECK-LABEL: @test_vst2_lane_s16(
16823 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x2_t, align 8
16824 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x2_t, align 8
16825 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[B]], i32 0, i32 0
16826 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16827 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16828 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x2_t* [[__S1]] to i8*
16829 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x2_t* [[B]] to i8*
16830 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16831 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16832 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16833 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16834 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16835 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16836 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[__S1]], i32 0, i32 0
16837 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16838 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16839 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16840 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16841 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16842 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16843 // CHECK:   ret void
test_vst2_lane_s16(int16_t * a,int16x4x2_t b)16844 void test_vst2_lane_s16(int16_t * a, int16x4x2_t b) {
16845   vst2_lane_s16(a, b, 3);
16846 }
16847 
16848 // CHECK-LABEL: @test_vst2_lane_s32(
16849 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x2_t, align 8
16850 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x2_t, align 8
16851 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[B]], i32 0, i32 0
16852 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x i32>]* [[COERCE_DIVE]] to [2 x i64]*
16853 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16854 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x2_t* [[__S1]] to i8*
16855 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x2_t* [[B]] to i8*
16856 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16857 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
16858 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16859 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL]], i32 0, i32 0
16860 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
16861 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
16862 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[__S1]], i32 0, i32 0
16863 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x i32>], [2 x <2 x i32>]* [[VAL1]], i32 0, i32 1
16864 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
16865 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
16866 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
16867 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
16868 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP8]], <2 x i32> [[TMP9]], i32 1, i32 4)
16869 // CHECK:   ret void
test_vst2_lane_s32(int32_t * a,int32x2x2_t b)16870 void test_vst2_lane_s32(int32_t * a, int32x2x2_t b) {
16871   vst2_lane_s32(a, b, 1);
16872 }
16873 
16874 // CHECK-LABEL: @test_vst2_lane_f16(
16875 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x2_t, align 8
16876 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x2_t, align 8
16877 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[B]], i32 0, i32 0
16878 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x half>]* [[COERCE_DIVE]] to [2 x i64]*
16879 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16880 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x2_t* [[__S1]] to i8*
16881 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x2_t* [[B]] to i8*
16882 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16883 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
16884 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16885 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL]], i32 0, i32 0
16886 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
16887 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
16888 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x2_t, %struct.float16x4x2_t* [[__S1]], i32 0, i32 0
16889 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x half>], [2 x <4 x half>]* [[VAL1]], i32 0, i32 1
16890 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
16891 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
16892 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
16893 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
16894 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP8]], <4 x half> [[TMP9]], i32 3, i32 2)
16895 // CHECK:   ret void
test_vst2_lane_f16(float16_t * a,float16x4x2_t b)16896 void test_vst2_lane_f16(float16_t * a, float16x4x2_t b) {
16897   vst2_lane_f16(a, b, 3);
16898 }
16899 
16900 // CHECK-LABEL: @test_vst2_lane_f32(
16901 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x2_t, align 8
16902 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x2_t, align 8
16903 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[B]], i32 0, i32 0
16904 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <2 x float>]* [[COERCE_DIVE]] to [2 x i64]*
16905 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16906 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x2_t* [[__S1]] to i8*
16907 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x2_t* [[B]] to i8*
16908 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16909 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
16910 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16911 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL]], i32 0, i32 0
16912 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
16913 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
16914 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[__S1]], i32 0, i32 0
16915 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* [[VAL1]], i32 0, i32 1
16916 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
16917 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
16918 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
16919 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
16920 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP9]], i32 1, i32 4)
16921 // CHECK:   ret void
test_vst2_lane_f32(float32_t * a,float32x2x2_t b)16922 void test_vst2_lane_f32(float32_t * a, float32x2x2_t b) {
16923   vst2_lane_f32(a, b, 1);
16924 }
16925 
16926 // CHECK-LABEL: @test_vst2_lane_p8(
16927 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
16928 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x2_t, align 8
16929 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
16930 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
16931 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16932 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x2_t* [[__S1]] to i8*
16933 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x2_t* [[B]] to i8*
16934 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16935 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16936 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL]], i32 0, i32 0
16937 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
16938 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__S1]], i32 0, i32 0
16939 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1]], i32 0, i32 1
16940 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
16941 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], i32 7, i32 1)
16942 // CHECK:   ret void
test_vst2_lane_p8(poly8_t * a,poly8x8x2_t b)16943 void test_vst2_lane_p8(poly8_t * a, poly8x8x2_t b) {
16944   vst2_lane_p8(a, b, 7);
16945 }
16946 
16947 // CHECK-LABEL: @test_vst2_lane_p16(
16948 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x2_t, align 8
16949 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x2_t, align 8
16950 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[B]], i32 0, i32 0
16951 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <4 x i16>]* [[COERCE_DIVE]] to [2 x i64]*
16952 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
16953 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x2_t* [[__S1]] to i8*
16954 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x2_t* [[B]] to i8*
16955 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 16, i1 false)
16956 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
16957 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16958 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL]], i32 0, i32 0
16959 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
16960 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
16961 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[__S1]], i32 0, i32 0
16962 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x <4 x i16>], [2 x <4 x i16>]* [[VAL1]], i32 0, i32 1
16963 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
16964 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
16965 // CHECK:   [[TMP8:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
16966 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
16967 // CHECK:   call void @llvm.arm.neon.vst2lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], i32 3, i32 2)
16968 // CHECK:   ret void
test_vst2_lane_p16(poly16_t * a,poly16x4x2_t b)16969 void test_vst2_lane_p16(poly16_t * a, poly16x4x2_t b) {
16970   vst2_lane_p16(a, b, 3);
16971 }
16972 
16973 // CHECK-LABEL: @test_vst3q_u8(
16974 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x3_t, align 16
16975 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x3_t, align 16
16976 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[B]], i32 0, i32 0
16977 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
16978 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
16979 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x3_t* [[__S1]] to i8*
16980 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x3_t* [[B]] to i8*
16981 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
16982 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16983 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
16984 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
16985 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16986 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
16987 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
16988 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x3_t, %struct.uint8x16x3_t* [[__S1]], i32 0, i32 0
16989 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
16990 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
16991 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
16992 // CHECK:   ret void
test_vst3q_u8(uint8_t * a,uint8x16x3_t b)16993 void test_vst3q_u8(uint8_t * a, uint8x16x3_t b) {
16994   vst3q_u8(a, b);
16995 }
16996 
16997 // CHECK-LABEL: @test_vst3q_u16(
16998 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
16999 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
17000 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
17001 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17002 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17003 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
17004 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
17005 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17006 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17007 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17008 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17009 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17010 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17011 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17012 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17013 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17014 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17015 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17016 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17017 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17018 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17019 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17020 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17021 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17022 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17023 // CHECK:   ret void
test_vst3q_u16(uint16_t * a,uint16x8x3_t b)17024 void test_vst3q_u16(uint16_t * a, uint16x8x3_t b) {
17025   vst3q_u16(a, b);
17026 }
17027 
17028 // CHECK-LABEL: @test_vst3q_u32(
17029 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
17030 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
17031 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
17032 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17033 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17034 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
17035 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
17036 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17037 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17038 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17039 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17040 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17041 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17042 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17043 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17044 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17045 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17046 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17047 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17048 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17049 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17050 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17051 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17052 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17053 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17054 // CHECK:   ret void
test_vst3q_u32(uint32_t * a,uint32x4x3_t b)17055 void test_vst3q_u32(uint32_t * a, uint32x4x3_t b) {
17056   vst3q_u32(a, b);
17057 }
17058 
17059 // CHECK-LABEL: @test_vst3q_s8(
17060 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x3_t, align 16
17061 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x3_t, align 16
17062 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[B]], i32 0, i32 0
17063 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17064 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17065 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x3_t* [[__S1]] to i8*
17066 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x3_t* [[B]] to i8*
17067 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17068 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17069 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17070 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17071 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17072 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17073 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17074 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x3_t, %struct.int8x16x3_t* [[__S1]], i32 0, i32 0
17075 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17076 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17077 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17078 // CHECK:   ret void
test_vst3q_s8(int8_t * a,int8x16x3_t b)17079 void test_vst3q_s8(int8_t * a, int8x16x3_t b) {
17080   vst3q_s8(a, b);
17081 }
17082 
17083 // CHECK-LABEL: @test_vst3q_s16(
17084 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17085 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17086 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17087 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17088 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17089 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17090 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17091 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17092 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17093 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17094 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17095 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17096 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17097 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17098 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17099 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17100 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17101 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17102 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17103 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17104 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17105 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17106 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17107 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17108 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17109 // CHECK:   ret void
test_vst3q_s16(int16_t * a,int16x8x3_t b)17110 void test_vst3q_s16(int16_t * a, int16x8x3_t b) {
17111   vst3q_s16(a, b);
17112 }
17113 
17114 // CHECK-LABEL: @test_vst3q_s32(
17115 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17116 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17117 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17118 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17119 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17120 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17121 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17122 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17123 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17124 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17125 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17126 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17127 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17128 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17129 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17130 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17131 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17132 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17133 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17134 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17135 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17136 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17137 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17138 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17139 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 4)
17140 // CHECK:   ret void
test_vst3q_s32(int32_t * a,int32x4x3_t b)17141 void test_vst3q_s32(int32_t * a, int32x4x3_t b) {
17142   vst3q_s32(a, b);
17143 }
17144 
17145 // CHECK-LABEL: @test_vst3q_f16(
17146 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17147 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17148 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17149 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17150 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17151 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17152 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17153 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17154 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
17155 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17156 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17157 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17158 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17159 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17160 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17161 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17162 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17163 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17164 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17165 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17166 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17167 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17168 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17169 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17170 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 2)
17171 // CHECK:   ret void
test_vst3q_f16(float16_t * a,float16x8x3_t b)17172 void test_vst3q_f16(float16_t * a, float16x8x3_t b) {
17173   vst3q_f16(a, b);
17174 }
17175 
17176 // CHECK-LABEL: @test_vst3q_f32(
17177 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17178 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17179 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17180 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17181 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17182 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17183 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17184 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17185 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
17186 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17187 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17188 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17189 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17190 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17191 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17192 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17193 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17194 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17195 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17196 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17197 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17198 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17199 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17200 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17201 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 4)
17202 // CHECK:   ret void
test_vst3q_f32(float32_t * a,float32x4x3_t b)17203 void test_vst3q_f32(float32_t * a, float32x4x3_t b) {
17204   vst3q_f32(a, b);
17205 }
17206 
17207 // CHECK-LABEL: @test_vst3q_p8(
17208 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x3_t, align 16
17209 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x3_t, align 16
17210 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[B]], i32 0, i32 0
17211 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <16 x i8>]* [[COERCE_DIVE]] to [6 x i64]*
17212 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17213 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x3_t* [[__S1]] to i8*
17214 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x3_t* [[B]] to i8*
17215 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17216 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17217 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL]], i32 0, i32 0
17218 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
17219 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17220 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL1]], i32 0, i32 1
17221 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
17222 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x3_t, %struct.poly8x16x3_t* [[__S1]], i32 0, i32 0
17223 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <16 x i8>], [3 x <16 x i8>]* [[VAL3]], i32 0, i32 2
17224 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
17225 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], i32 1)
17226 // CHECK:   ret void
test_vst3q_p8(poly8_t * a,poly8x16x3_t b)17227 void test_vst3q_p8(poly8_t * a, poly8x16x3_t b) {
17228   vst3q_p8(a, b);
17229 }
17230 
17231 // CHECK-LABEL: @test_vst3q_p16(
17232 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17233 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17234 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17235 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17236 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17237 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17238 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17239 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17240 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17241 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17242 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17243 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17244 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17245 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17246 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17247 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17248 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17249 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17250 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17251 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17252 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17253 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17254 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17255 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17256 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 2)
17257 // CHECK:   ret void
test_vst3q_p16(poly16_t * a,poly16x8x3_t b)17258 void test_vst3q_p16(poly16_t * a, poly16x8x3_t b) {
17259   vst3q_p16(a, b);
17260 }
17261 
17262 // CHECK-LABEL: @test_vst3_u8(
17263 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17264 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17265 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17266 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17267 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17268 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17269 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17270 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17271 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17272 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17273 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17274 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17275 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17276 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17277 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17278 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17279 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17280 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17281 // CHECK:   ret void
test_vst3_u8(uint8_t * a,uint8x8x3_t b)17282 void test_vst3_u8(uint8_t * a, uint8x8x3_t b) {
17283   vst3_u8(a, b);
17284 }
17285 
17286 // CHECK-LABEL: @test_vst3_u16(
17287 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17288 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17289 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17290 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17291 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17292 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17293 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17294 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17295 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17296 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17297 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17298 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17299 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17300 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17301 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17302 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17303 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17304 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17305 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17306 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17307 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17308 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17309 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17310 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17311 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17312 // CHECK:   ret void
test_vst3_u16(uint16_t * a,uint16x4x3_t b)17313 void test_vst3_u16(uint16_t * a, uint16x4x3_t b) {
17314   vst3_u16(a, b);
17315 }
17316 
17317 // CHECK-LABEL: @test_vst3_u32(
17318 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17319 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17320 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17321 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17322 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17323 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17324 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17325 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17326 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17327 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17328 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17329 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17330 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17331 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17332 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17333 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17334 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17335 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17336 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17337 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17338 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17339 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17340 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17341 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17342 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17343 // CHECK:   ret void
test_vst3_u32(uint32_t * a,uint32x2x3_t b)17344 void test_vst3_u32(uint32_t * a, uint32x2x3_t b) {
17345   vst3_u32(a, b);
17346 }
17347 
17348 // CHECK-LABEL: @test_vst3_u64(
17349 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x3_t, align 8
17350 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x3_t, align 8
17351 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[B]], i32 0, i32 0
17352 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17353 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17354 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x3_t* [[__S1]] to i8*
17355 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x3_t* [[B]] to i8*
17356 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17357 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
17358 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17359 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17360 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17361 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17362 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17363 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17364 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17365 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17366 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x3_t, %struct.uint64x1x3_t* [[__S1]], i32 0, i32 0
17367 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17368 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17369 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17370 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17371 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17372 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17373 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17374 // CHECK:   ret void
test_vst3_u64(uint64_t * a,uint64x1x3_t b)17375 void test_vst3_u64(uint64_t * a, uint64x1x3_t b) {
17376   vst3_u64(a, b);
17377 }
17378 
17379 // CHECK-LABEL: @test_vst3_s8(
17380 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17381 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17382 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17383 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17384 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17385 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17386 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17387 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17388 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17389 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17390 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17391 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17392 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17393 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17394 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17395 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17396 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17397 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17398 // CHECK:   ret void
test_vst3_s8(int8_t * a,int8x8x3_t b)17399 void test_vst3_s8(int8_t * a, int8x8x3_t b) {
17400   vst3_s8(a, b);
17401 }
17402 
17403 // CHECK-LABEL: @test_vst3_s16(
17404 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17405 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17406 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17407 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17408 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17409 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17410 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17411 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17412 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17413 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17414 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17415 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17416 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17417 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17418 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17419 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17420 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17421 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17422 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17423 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17424 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17425 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17426 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17427 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17428 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17429 // CHECK:   ret void
test_vst3_s16(int16_t * a,int16x4x3_t b)17430 void test_vst3_s16(int16_t * a, int16x4x3_t b) {
17431   vst3_s16(a, b);
17432 }
17433 
17434 // CHECK-LABEL: @test_vst3_s32(
17435 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17436 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17437 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17438 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17439 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17440 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17441 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17442 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17443 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17444 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17445 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17446 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17447 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17448 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17449 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17450 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17451 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17452 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17453 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17454 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17455 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17456 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17457 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17458 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17459 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 4)
17460 // CHECK:   ret void
test_vst3_s32(int32_t * a,int32x2x3_t b)17461 void test_vst3_s32(int32_t * a, int32x2x3_t b) {
17462   vst3_s32(a, b);
17463 }
17464 
17465 // CHECK-LABEL: @test_vst3_s64(
17466 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x3_t, align 8
17467 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x3_t, align 8
17468 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[B]], i32 0, i32 0
17469 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <1 x i64>]* [[COERCE_DIVE]] to [3 x i64]*
17470 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17471 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x3_t* [[__S1]] to i8*
17472 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x3_t* [[B]] to i8*
17473 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17474 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
17475 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17476 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL]], i32 0, i32 0
17477 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
17478 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
17479 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17480 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i32 0, i32 1
17481 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
17482 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
17483 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x3_t, %struct.int64x1x3_t* [[__S1]], i32 0, i32 0
17484 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL3]], i32 0, i32 2
17485 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
17486 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
17487 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
17488 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
17489 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
17490 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], <1 x i64> [[TMP12]], i32 4)
17491 // CHECK:   ret void
test_vst3_s64(int64_t * a,int64x1x3_t b)17492 void test_vst3_s64(int64_t * a, int64x1x3_t b) {
17493   vst3_s64(a, b);
17494 }
17495 
17496 // CHECK-LABEL: @test_vst3_f16(
17497 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
17498 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
17499 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
17500 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
17501 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17502 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
17503 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
17504 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17505 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
17506 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17507 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
17508 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
17509 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
17510 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17511 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
17512 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
17513 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
17514 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
17515 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
17516 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
17517 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
17518 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
17519 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
17520 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
17521 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 2)
17522 // CHECK:   ret void
test_vst3_f16(float16_t * a,float16x4x3_t b)17523 void test_vst3_f16(float16_t * a, float16x4x3_t b) {
17524   vst3_f16(a, b);
17525 }
17526 
17527 // CHECK-LABEL: @test_vst3_f32(
17528 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
17529 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
17530 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
17531 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
17532 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17533 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
17534 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
17535 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17536 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
17537 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17538 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
17539 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
17540 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
17541 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17542 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
17543 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
17544 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
17545 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
17546 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
17547 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
17548 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
17549 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
17550 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
17551 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
17552 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 4)
17553 // CHECK:   ret void
test_vst3_f32(float32_t * a,float32x2x3_t b)17554 void test_vst3_f32(float32_t * a, float32x2x3_t b) {
17555   vst3_f32(a, b);
17556 }
17557 
17558 // CHECK-LABEL: @test_vst3_p8(
17559 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
17560 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
17561 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
17562 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17563 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17564 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
17565 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
17566 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17567 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17568 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17569 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17570 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17571 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17572 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17573 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
17574 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17575 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17576 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 1)
17577 // CHECK:   ret void
test_vst3_p8(poly8_t * a,poly8x8x3_t b)17578 void test_vst3_p8(poly8_t * a, poly8x8x3_t b) {
17579   vst3_p8(a, b);
17580 }
17581 
17582 // CHECK-LABEL: @test_vst3_p16(
17583 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
17584 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
17585 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
17586 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17587 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17588 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
17589 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
17590 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17591 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17592 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17593 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17594 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17595 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17596 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17597 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17598 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17599 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17600 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
17601 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17602 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17603 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17604 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17605 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17606 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17607 // CHECK:   call void @llvm.arm.neon.vst3.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 2)
17608 // CHECK:   ret void
test_vst3_p16(poly16_t * a,poly16x4x3_t b)17609 void test_vst3_p16(poly16_t * a, poly16x4x3_t b) {
17610   vst3_p16(a, b);
17611 }
17612 
17613 // CHECK-LABEL: @test_vst3q_lane_u16(
17614 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x3_t, align 16
17615 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x3_t, align 16
17616 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[B]], i32 0, i32 0
17617 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17618 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17619 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x3_t* [[__S1]] to i8*
17620 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x3_t* [[B]] to i8*
17621 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17622 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17623 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17624 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17625 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17626 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17627 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17628 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17629 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17630 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17631 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x3_t, %struct.uint16x8x3_t* [[__S1]], i32 0, i32 0
17632 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17633 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17634 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17635 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17636 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17637 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17638 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17639 // CHECK:   ret void
test_vst3q_lane_u16(uint16_t * a,uint16x8x3_t b)17640 void test_vst3q_lane_u16(uint16_t * a, uint16x8x3_t b) {
17641   vst3q_lane_u16(a, b, 7);
17642 }
17643 
17644 // CHECK-LABEL: @test_vst3q_lane_u32(
17645 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x3_t, align 16
17646 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x3_t, align 16
17647 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[B]], i32 0, i32 0
17648 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17649 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17650 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x3_t* [[__S1]] to i8*
17651 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x3_t* [[B]] to i8*
17652 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17653 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17654 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17655 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17656 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17657 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17658 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17659 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17660 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17661 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17662 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x3_t, %struct.uint32x4x3_t* [[__S1]], i32 0, i32 0
17663 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17664 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17665 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17666 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17667 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17668 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17669 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17670 // CHECK:   ret void
test_vst3q_lane_u32(uint32_t * a,uint32x4x3_t b)17671 void test_vst3q_lane_u32(uint32_t * a, uint32x4x3_t b) {
17672   vst3q_lane_u32(a, b, 3);
17673 }
17674 
17675 // CHECK-LABEL: @test_vst3q_lane_s16(
17676 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x3_t, align 16
17677 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x3_t, align 16
17678 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[B]], i32 0, i32 0
17679 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17680 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17681 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x3_t* [[__S1]] to i8*
17682 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x3_t* [[B]] to i8*
17683 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17684 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17685 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17686 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17687 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17688 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17689 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17690 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17691 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17692 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17693 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x3_t, %struct.int16x8x3_t* [[__S1]], i32 0, i32 0
17694 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17695 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17696 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17697 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17698 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17699 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17700 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17701 // CHECK:   ret void
test_vst3q_lane_s16(int16_t * a,int16x8x3_t b)17702 void test_vst3q_lane_s16(int16_t * a, int16x8x3_t b) {
17703   vst3q_lane_s16(a, b, 7);
17704 }
17705 
17706 // CHECK-LABEL: @test_vst3q_lane_s32(
17707 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x3_t, align 16
17708 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x3_t, align 16
17709 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[B]], i32 0, i32 0
17710 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i32>]* [[COERCE_DIVE]] to [6 x i64]*
17711 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17712 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x3_t* [[__S1]] to i8*
17713 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x3_t* [[B]] to i8*
17714 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17715 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17716 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17717 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL]], i32 0, i32 0
17718 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
17719 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
17720 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17721 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL1]], i32 0, i32 1
17722 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
17723 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
17724 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x3_t, %struct.int32x4x3_t* [[__S1]], i32 0, i32 0
17725 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i32>], [3 x <4 x i32>]* [[VAL3]], i32 0, i32 2
17726 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
17727 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
17728 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
17729 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
17730 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
17731 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], i32 3, i32 4)
17732 // CHECK:   ret void
test_vst3q_lane_s32(int32_t * a,int32x4x3_t b)17733 void test_vst3q_lane_s32(int32_t * a, int32x4x3_t b) {
17734   vst3q_lane_s32(a, b, 3);
17735 }
17736 
17737 // CHECK-LABEL: @test_vst3q_lane_f16(
17738 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x3_t, align 16
17739 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x3_t, align 16
17740 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[B]], i32 0, i32 0
17741 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x half>]* [[COERCE_DIVE]] to [6 x i64]*
17742 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17743 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x3_t* [[__S1]] to i8*
17744 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x3_t* [[B]] to i8*
17745 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17746 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
17747 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17748 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL]], i32 0, i32 0
17749 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
17750 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
17751 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17752 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL1]], i32 0, i32 1
17753 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
17754 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
17755 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x3_t, %struct.float16x8x3_t* [[__S1]], i32 0, i32 0
17756 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x half>], [3 x <8 x half>]* [[VAL3]], i32 0, i32 2
17757 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
17758 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
17759 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
17760 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
17761 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
17762 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP10]], <8 x half> [[TMP11]], <8 x half> [[TMP12]], i32 7, i32 2)
17763 // CHECK:   ret void
test_vst3q_lane_f16(float16_t * a,float16x8x3_t b)17764 void test_vst3q_lane_f16(float16_t * a, float16x8x3_t b) {
17765   vst3q_lane_f16(a, b, 7);
17766 }
17767 
17768 // CHECK-LABEL: @test_vst3q_lane_f32(
17769 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x3_t, align 16
17770 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x3_t, align 16
17771 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[B]], i32 0, i32 0
17772 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x float>]* [[COERCE_DIVE]] to [6 x i64]*
17773 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17774 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x3_t* [[__S1]] to i8*
17775 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x3_t* [[B]] to i8*
17776 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17777 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
17778 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17779 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL]], i32 0, i32 0
17780 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
17781 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
17782 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17783 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL1]], i32 0, i32 1
17784 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
17785 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
17786 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x3_t, %struct.float32x4x3_t* [[__S1]], i32 0, i32 0
17787 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x float>], [3 x <4 x float>]* [[VAL3]], i32 0, i32 2
17788 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
17789 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
17790 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
17791 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
17792 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
17793 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP10]], <4 x float> [[TMP11]], <4 x float> [[TMP12]], i32 3, i32 4)
17794 // CHECK:   ret void
test_vst3q_lane_f32(float32_t * a,float32x4x3_t b)17795 void test_vst3q_lane_f32(float32_t * a, float32x4x3_t b) {
17796   vst3q_lane_f32(a, b, 3);
17797 }
17798 
17799 // CHECK-LABEL: @test_vst3q_lane_p16(
17800 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x3_t, align 16
17801 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x3_t, align 16
17802 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[B]], i32 0, i32 0
17803 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i16>]* [[COERCE_DIVE]] to [6 x i64]*
17804 // CHECK:   store [6 x i64] [[B]].coerce, [6 x i64]* [[TMP0]], align 16
17805 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x3_t* [[__S1]] to i8*
17806 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x3_t* [[B]] to i8*
17807 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 48, i1 false)
17808 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17809 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17810 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL]], i32 0, i32 0
17811 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
17812 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
17813 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17814 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL1]], i32 0, i32 1
17815 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
17816 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
17817 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x3_t, %struct.poly16x8x3_t* [[__S1]], i32 0, i32 0
17818 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i16>], [3 x <8 x i16>]* [[VAL3]], i32 0, i32 2
17819 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
17820 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
17821 // CHECK:   [[TMP10:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
17822 // CHECK:   [[TMP11:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
17823 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
17824 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP10]], <8 x i16> [[TMP11]], <8 x i16> [[TMP12]], i32 7, i32 2)
17825 // CHECK:   ret void
test_vst3q_lane_p16(poly16_t * a,poly16x8x3_t b)17826 void test_vst3q_lane_p16(poly16_t * a, poly16x8x3_t b) {
17827   vst3q_lane_p16(a, b, 7);
17828 }
17829 
17830 // CHECK-LABEL: @test_vst3_lane_u8(
17831 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
17832 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x3_t, align 8
17833 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
17834 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17835 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17836 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x3_t* [[__S1]] to i8*
17837 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x3_t* [[B]] to i8*
17838 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17839 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17840 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17841 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17842 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17843 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17844 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17845 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__S1]], i32 0, i32 0
17846 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17847 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17848 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17849 // CHECK:   ret void
test_vst3_lane_u8(uint8_t * a,uint8x8x3_t b)17850 void test_vst3_lane_u8(uint8_t * a, uint8x8x3_t b) {
17851   vst3_lane_u8(a, b, 7);
17852 }
17853 
17854 // CHECK-LABEL: @test_vst3_lane_u16(
17855 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x3_t, align 8
17856 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x3_t, align 8
17857 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[B]], i32 0, i32 0
17858 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17859 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17860 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x3_t* [[__S1]] to i8*
17861 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x3_t* [[B]] to i8*
17862 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17863 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17864 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17865 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17866 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17867 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17868 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17869 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17870 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17871 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17872 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x3_t, %struct.uint16x4x3_t* [[__S1]], i32 0, i32 0
17873 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17874 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17875 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17876 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17877 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17878 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17879 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17880 // CHECK:   ret void
test_vst3_lane_u16(uint16_t * a,uint16x4x3_t b)17881 void test_vst3_lane_u16(uint16_t * a, uint16x4x3_t b) {
17882   vst3_lane_u16(a, b, 3);
17883 }
17884 
17885 // CHECK-LABEL: @test_vst3_lane_u32(
17886 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x3_t, align 8
17887 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x3_t, align 8
17888 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[B]], i32 0, i32 0
17889 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17890 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17891 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x3_t* [[__S1]] to i8*
17892 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x3_t* [[B]] to i8*
17893 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17894 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17895 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17896 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17897 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17898 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17899 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17900 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17901 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17902 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17903 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x3_t, %struct.uint32x2x3_t* [[__S1]], i32 0, i32 0
17904 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17905 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17906 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17907 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17908 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17909 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17910 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17911 // CHECK:   ret void
test_vst3_lane_u32(uint32_t * a,uint32x2x3_t b)17912 void test_vst3_lane_u32(uint32_t * a, uint32x2x3_t b) {
17913   vst3_lane_u32(a, b, 1);
17914 }
17915 
17916 // CHECK-LABEL: @test_vst3_lane_s8(
17917 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
17918 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x3_t, align 8
17919 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
17920 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
17921 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17922 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x3_t* [[__S1]] to i8*
17923 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x3_t* [[B]] to i8*
17924 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17925 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17926 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
17927 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
17928 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17929 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
17930 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
17931 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__S1]], i32 0, i32 0
17932 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
17933 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
17934 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
17935 // CHECK:   ret void
test_vst3_lane_s8(int8_t * a,int8x8x3_t b)17936 void test_vst3_lane_s8(int8_t * a, int8x8x3_t b) {
17937   vst3_lane_s8(a, b, 7);
17938 }
17939 
17940 // CHECK-LABEL: @test_vst3_lane_s16(
17941 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x3_t, align 8
17942 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x3_t, align 8
17943 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[B]], i32 0, i32 0
17944 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
17945 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17946 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x3_t* [[__S1]] to i8*
17947 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x3_t* [[B]] to i8*
17948 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17949 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
17950 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17951 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
17952 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
17953 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
17954 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17955 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
17956 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
17957 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
17958 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x3_t, %struct.int16x4x3_t* [[__S1]], i32 0, i32 0
17959 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
17960 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
17961 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
17962 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
17963 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
17964 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
17965 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
17966 // CHECK:   ret void
test_vst3_lane_s16(int16_t * a,int16x4x3_t b)17967 void test_vst3_lane_s16(int16_t * a, int16x4x3_t b) {
17968   vst3_lane_s16(a, b, 3);
17969 }
17970 
17971 // CHECK-LABEL: @test_vst3_lane_s32(
17972 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x3_t, align 8
17973 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x3_t, align 8
17974 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[B]], i32 0, i32 0
17975 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x i32>]* [[COERCE_DIVE]] to [3 x i64]*
17976 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
17977 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x3_t* [[__S1]] to i8*
17978 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x3_t* [[B]] to i8*
17979 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
17980 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
17981 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17982 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL]], i32 0, i32 0
17983 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
17984 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
17985 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17986 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL1]], i32 0, i32 1
17987 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
17988 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
17989 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x3_t, %struct.int32x2x3_t* [[__S1]], i32 0, i32 0
17990 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x i32>], [3 x <2 x i32>]* [[VAL3]], i32 0, i32 2
17991 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
17992 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
17993 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
17994 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
17995 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
17996 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP10]], <2 x i32> [[TMP11]], <2 x i32> [[TMP12]], i32 1, i32 4)
17997 // CHECK:   ret void
test_vst3_lane_s32(int32_t * a,int32x2x3_t b)17998 void test_vst3_lane_s32(int32_t * a, int32x2x3_t b) {
17999   vst3_lane_s32(a, b, 1);
18000 }
18001 
18002 // CHECK-LABEL: @test_vst3_lane_f16(
18003 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x3_t, align 8
18004 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x3_t, align 8
18005 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[B]], i32 0, i32 0
18006 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x half>]* [[COERCE_DIVE]] to [3 x i64]*
18007 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18008 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x3_t* [[__S1]] to i8*
18009 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x3_t* [[B]] to i8*
18010 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18011 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
18012 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
18013 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL]], i32 0, i32 0
18014 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
18015 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18016 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
18017 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL1]], i32 0, i32 1
18018 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
18019 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18020 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x3_t, %struct.float16x4x3_t* [[__S1]], i32 0, i32 0
18021 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x half>], [3 x <4 x half>]* [[VAL3]], i32 0, i32 2
18022 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
18023 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
18024 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
18025 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
18026 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
18027 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP10]], <4 x half> [[TMP11]], <4 x half> [[TMP12]], i32 3, i32 2)
18028 // CHECK:   ret void
test_vst3_lane_f16(float16_t * a,float16x4x3_t b)18029 void test_vst3_lane_f16(float16_t * a, float16x4x3_t b) {
18030   vst3_lane_f16(a, b, 3);
18031 }
18032 
18033 // CHECK-LABEL: @test_vst3_lane_f32(
18034 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x3_t, align 8
18035 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x3_t, align 8
18036 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[B]], i32 0, i32 0
18037 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <2 x float>]* [[COERCE_DIVE]] to [3 x i64]*
18038 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18039 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x3_t* [[__S1]] to i8*
18040 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x3_t* [[B]] to i8*
18041 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18042 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
18043 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18044 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL]], i32 0, i32 0
18045 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
18046 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18047 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18048 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL1]], i32 0, i32 1
18049 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18050 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18051 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x3_t, %struct.float32x2x3_t* [[__S1]], i32 0, i32 0
18052 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <2 x float>], [3 x <2 x float>]* [[VAL3]], i32 0, i32 2
18053 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18054 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18055 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18056 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18057 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18058 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP10]], <2 x float> [[TMP11]], <2 x float> [[TMP12]], i32 1, i32 4)
18059 // CHECK:   ret void
test_vst3_lane_f32(float32_t * a,float32x2x3_t b)18060 void test_vst3_lane_f32(float32_t * a, float32x2x3_t b) {
18061   vst3_lane_f32(a, b, 1);
18062 }
18063 
18064 // CHECK-LABEL: @test_vst3_lane_p8(
18065 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
18066 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x3_t, align 8
18067 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
18068 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
18069 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18070 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x3_t* [[__S1]] to i8*
18071 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x3_t* [[B]] to i8*
18072 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18073 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18074 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL]], i32 0, i32 0
18075 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18076 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18077 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18078 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18079 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__S1]], i32 0, i32 0
18080 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18081 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18082 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], i32 7, i32 1)
18083 // CHECK:   ret void
test_vst3_lane_p8(poly8_t * a,poly8x8x3_t b)18084 void test_vst3_lane_p8(poly8_t * a, poly8x8x3_t b) {
18085   vst3_lane_p8(a, b, 7);
18086 }
18087 
18088 // CHECK-LABEL: @test_vst3_lane_p16(
18089 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x3_t, align 8
18090 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x3_t, align 8
18091 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[B]], i32 0, i32 0
18092 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <4 x i16>]* [[COERCE_DIVE]] to [3 x i64]*
18093 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
18094 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x3_t* [[__S1]] to i8*
18095 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x3_t* [[B]] to i8*
18096 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 24, i1 false)
18097 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18098 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18099 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL]], i32 0, i32 0
18100 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18101 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18102 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18103 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18104 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18105 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18106 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x3_t, %struct.poly16x4x3_t* [[__S1]], i32 0, i32 0
18107 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [3 x <4 x i16>], [3 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18108 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18109 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18110 // CHECK:   [[TMP10:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18111 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18112 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18113 // CHECK:   call void @llvm.arm.neon.vst3lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP10]], <4 x i16> [[TMP11]], <4 x i16> [[TMP12]], i32 3, i32 2)
18114 // CHECK:   ret void
test_vst3_lane_p16(poly16_t * a,poly16x4x3_t b)18115 void test_vst3_lane_p16(poly16_t * a, poly16x4x3_t b) {
18116   vst3_lane_p16(a, b, 3);
18117 }
18118 
18119 // CHECK-LABEL: @test_vst4q_u8(
18120 // CHECK:   [[B:%.*]] = alloca %struct.uint8x16x4_t, align 16
18121 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x16x4_t, align 16
18122 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[B]], i32 0, i32 0
18123 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18124 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18125 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x16x4_t* [[__S1]] to i8*
18126 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x16x4_t* [[B]] to i8*
18127 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18128 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18129 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18130 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18131 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18132 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18133 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18134 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18135 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18136 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18137 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x16x4_t, %struct.uint8x16x4_t* [[__S1]], i32 0, i32 0
18138 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18139 // CHECK:   [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18140 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18141 // CHECK:   ret void
test_vst4q_u8(uint8_t * a,uint8x16x4_t b)18142 void test_vst4q_u8(uint8_t * a, uint8x16x4_t b) {
18143   vst4q_u8(a, b);
18144 }
18145 
18146 // CHECK-LABEL: @test_vst4q_u16(
18147 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18148 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18149 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18150 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18151 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18152 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18153 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18154 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18155 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18156 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18157 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18158 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18159 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18160 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18161 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18162 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18163 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18164 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18165 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18166 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18167 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18168 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18169 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18170 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18171 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18172 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18173 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18174 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18175 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18176 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18177 // CHECK:   ret void
test_vst4q_u16(uint16_t * a,uint16x8x4_t b)18178 void test_vst4q_u16(uint16_t * a, uint16x8x4_t b) {
18179   vst4q_u16(a, b);
18180 }
18181 
18182 // CHECK-LABEL: @test_vst4q_u32(
18183 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18184 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18185 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18186 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18187 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18188 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18189 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18190 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18191 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18192 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18193 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18194 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18195 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18196 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18197 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18198 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18199 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18200 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18201 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18202 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18203 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18204 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18205 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18206 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18207 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18208 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18209 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18210 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18211 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18212 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18213 // CHECK:   ret void
test_vst4q_u32(uint32_t * a,uint32x4x4_t b)18214 void test_vst4q_u32(uint32_t * a, uint32x4x4_t b) {
18215   vst4q_u32(a, b);
18216 }
18217 
18218 // CHECK-LABEL: @test_vst4q_s8(
18219 // CHECK:   [[B:%.*]] = alloca %struct.int8x16x4_t, align 16
18220 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x16x4_t, align 16
18221 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[B]], i32 0, i32 0
18222 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18223 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18224 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x16x4_t* [[__S1]] to i8*
18225 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x16x4_t* [[B]] to i8*
18226 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18227 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18228 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18229 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18230 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18231 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18232 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18233 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18234 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18235 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18236 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x16x4_t, %struct.int8x16x4_t* [[__S1]], i32 0, i32 0
18237 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18238 // CHECK:   [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18239 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18240 // CHECK:   ret void
test_vst4q_s8(int8_t * a,int8x16x4_t b)18241 void test_vst4q_s8(int8_t * a, int8x16x4_t b) {
18242   vst4q_s8(a, b);
18243 }
18244 
18245 // CHECK-LABEL: @test_vst4q_s16(
18246 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18247 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18248 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18249 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18250 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18251 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18252 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18253 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18254 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18255 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18256 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18257 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18258 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18259 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18260 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18261 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18262 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18263 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18264 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18265 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18266 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18267 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18268 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18269 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18270 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18271 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18272 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18273 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18274 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18275 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18276 // CHECK:   ret void
test_vst4q_s16(int16_t * a,int16x8x4_t b)18277 void test_vst4q_s16(int16_t * a, int16x8x4_t b) {
18278   vst4q_s16(a, b);
18279 }
18280 
18281 // CHECK-LABEL: @test_vst4q_s32(
18282 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18283 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18284 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18285 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18286 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18287 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18288 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18289 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18290 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18291 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18292 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18293 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18294 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18295 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18296 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18297 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18298 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18299 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18300 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18301 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18302 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18303 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18304 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18305 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18306 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18307 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18308 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18309 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18310 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18311 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 4)
18312 // CHECK:   ret void
test_vst4q_s32(int32_t * a,int32x4x4_t b)18313 void test_vst4q_s32(int32_t * a, int32x4x4_t b) {
18314   vst4q_s32(a, b);
18315 }
18316 
18317 // CHECK-LABEL: @test_vst4q_f16(
18318 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
18319 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
18320 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
18321 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
18322 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18323 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
18324 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
18325 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18326 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
18327 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18328 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
18329 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
18330 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
18331 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18332 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
18333 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
18334 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
18335 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18336 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
18337 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
18338 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
18339 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
18340 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
18341 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
18342 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
18343 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
18344 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
18345 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
18346 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
18347 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 2)
18348 // CHECK:   ret void
test_vst4q_f16(float16_t * a,float16x8x4_t b)18349 void test_vst4q_f16(float16_t * a, float16x8x4_t b) {
18350   vst4q_f16(a, b);
18351 }
18352 
18353 // CHECK-LABEL: @test_vst4q_f32(
18354 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
18355 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
18356 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
18357 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
18358 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18359 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
18360 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
18361 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18362 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
18363 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18364 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
18365 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
18366 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
18367 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18368 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
18369 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
18370 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
18371 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18372 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
18373 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
18374 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
18375 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
18376 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
18377 // CHECK:   [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
18378 // CHECK:   [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
18379 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
18380 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
18381 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
18382 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
18383 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 4)
18384 // CHECK:   ret void
test_vst4q_f32(float32_t * a,float32x4x4_t b)18385 void test_vst4q_f32(float32_t * a, float32x4x4_t b) {
18386   vst4q_f32(a, b);
18387 }
18388 
18389 // CHECK-LABEL: @test_vst4q_p8(
18390 // CHECK:   [[B:%.*]] = alloca %struct.poly8x16x4_t, align 16
18391 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x16x4_t, align 16
18392 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[B]], i32 0, i32 0
18393 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <16 x i8>]* [[COERCE_DIVE]] to [8 x i64]*
18394 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18395 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x16x4_t* [[__S1]] to i8*
18396 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x16x4_t* [[B]] to i8*
18397 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18398 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18399 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL]], i32 0, i32 0
18400 // CHECK:   [[TMP3:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX]], align 16
18401 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18402 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL1]], i32 0, i32 1
18403 // CHECK:   [[TMP4:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX2]], align 16
18404 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18405 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL3]], i32 0, i32 2
18406 // CHECK:   [[TMP5:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX4]], align 16
18407 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x16x4_t, %struct.poly8x16x4_t* [[__S1]], i32 0, i32 0
18408 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <16 x i8>], [4 x <16 x i8>]* [[VAL5]], i32 0, i32 3
18409 // CHECK:   [[TMP6:%.*]] = load <16 x i8>, <16 x i8>* [[ARRAYIDX6]], align 16
18410 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v16i8(i8* %a, <16 x i8> [[TMP3]], <16 x i8> [[TMP4]], <16 x i8> [[TMP5]], <16 x i8> [[TMP6]], i32 1)
18411 // CHECK:   ret void
test_vst4q_p8(poly8_t * a,poly8x16x4_t b)18412 void test_vst4q_p8(poly8_t * a, poly8x16x4_t b) {
18413   vst4q_p8(a, b);
18414 }
18415 
18416 // CHECK-LABEL: @test_vst4q_p16(
18417 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
18418 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
18419 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
18420 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18421 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18422 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
18423 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
18424 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18425 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18426 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18427 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18428 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18429 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18430 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18431 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18432 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18433 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18434 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18435 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18436 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18437 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18438 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
18439 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18440 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18441 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18442 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18443 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18444 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18445 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18446 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 2)
18447 // CHECK:   ret void
test_vst4q_p16(poly16_t * a,poly16x8x4_t b)18448 void test_vst4q_p16(poly16_t * a, poly16x8x4_t b) {
18449   vst4q_p16(a, b);
18450 }
18451 
18452 // CHECK-LABEL: @test_vst4_u8(
18453 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
18454 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
18455 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
18456 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18457 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18458 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
18459 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
18460 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18461 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18462 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18463 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18464 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18465 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18466 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18467 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18468 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18469 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18470 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
18471 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18472 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18473 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18474 // CHECK:   ret void
test_vst4_u8(uint8_t * a,uint8x8x4_t b)18475 void test_vst4_u8(uint8_t * a, uint8x8x4_t b) {
18476   vst4_u8(a, b);
18477 }
18478 
18479 // CHECK-LABEL: @test_vst4_u16(
18480 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
18481 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
18482 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
18483 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18484 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18485 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
18486 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
18487 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18488 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18489 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18490 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18491 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18492 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18493 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18494 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18495 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18496 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18497 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18498 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18499 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18500 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18501 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
18502 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18503 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18504 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18505 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18506 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18507 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18508 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18509 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18510 // CHECK:   ret void
test_vst4_u16(uint16_t * a,uint16x4x4_t b)18511 void test_vst4_u16(uint16_t * a, uint16x4x4_t b) {
18512   vst4_u16(a, b);
18513 }
18514 
18515 // CHECK-LABEL: @test_vst4_u32(
18516 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
18517 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
18518 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
18519 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18520 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18521 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
18522 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
18523 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18524 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18525 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18526 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18527 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18528 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18529 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18530 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18531 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18532 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18533 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18534 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18535 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18536 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18537 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
18538 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18539 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18540 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18541 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18542 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18543 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18544 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18545 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18546 // CHECK:   ret void
test_vst4_u32(uint32_t * a,uint32x2x4_t b)18547 void test_vst4_u32(uint32_t * a, uint32x2x4_t b) {
18548   vst4_u32(a, b);
18549 }
18550 
18551 // CHECK-LABEL: @test_vst4_u64(
18552 // CHECK:   [[B:%.*]] = alloca %struct.uint64x1x4_t, align 8
18553 // CHECK:   [[__S1:%.*]] = alloca %struct.uint64x1x4_t, align 8
18554 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[B]], i32 0, i32 0
18555 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18556 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18557 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint64x1x4_t* [[__S1]] to i8*
18558 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint64x1x4_t* [[B]] to i8*
18559 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18560 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
18561 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18562 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18563 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18564 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18565 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18566 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18567 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18568 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18569 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18570 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18571 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18572 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18573 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint64x1x4_t, %struct.uint64x1x4_t* [[__S1]], i32 0, i32 0
18574 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18575 // CHECK:   [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18576 // CHECK:   [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18577 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18578 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18579 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18580 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18581 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18582 // CHECK:   ret void
test_vst4_u64(uint64_t * a,uint64x1x4_t b)18583 void test_vst4_u64(uint64_t * a, uint64x1x4_t b) {
18584   vst4_u64(a, b);
18585 }
18586 
18587 // CHECK-LABEL: @test_vst4_s8(
18588 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
18589 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
18590 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
18591 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18592 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18593 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
18594 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
18595 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18596 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18597 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18598 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18599 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18600 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18601 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18602 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18603 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18604 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18605 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
18606 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18607 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18608 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18609 // CHECK:   ret void
test_vst4_s8(int8_t * a,int8x8x4_t b)18610 void test_vst4_s8(int8_t * a, int8x8x4_t b) {
18611   vst4_s8(a, b);
18612 }
18613 
18614 // CHECK-LABEL: @test_vst4_s16(
18615 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
18616 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
18617 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
18618 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18619 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18620 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
18621 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
18622 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18623 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18624 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18625 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18626 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18627 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18628 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18629 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18630 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18631 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18632 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18633 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18634 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18635 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18636 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
18637 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18638 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18639 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18640 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18641 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18642 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18643 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18644 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18645 // CHECK:   ret void
test_vst4_s16(int16_t * a,int16x4x4_t b)18646 void test_vst4_s16(int16_t * a, int16x4x4_t b) {
18647   vst4_s16(a, b);
18648 }
18649 
18650 // CHECK-LABEL: @test_vst4_s32(
18651 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
18652 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
18653 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
18654 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
18655 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18656 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
18657 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
18658 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18659 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18660 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18661 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
18662 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
18663 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
18664 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18665 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
18666 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
18667 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
18668 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18669 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
18670 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
18671 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
18672 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
18673 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
18674 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
18675 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
18676 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
18677 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
18678 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
18679 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
18680 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 4)
18681 // CHECK:   ret void
test_vst4_s32(int32_t * a,int32x2x4_t b)18682 void test_vst4_s32(int32_t * a, int32x2x4_t b) {
18683   vst4_s32(a, b);
18684 }
18685 
18686 // CHECK-LABEL: @test_vst4_s64(
18687 // CHECK:   [[B:%.*]] = alloca %struct.int64x1x4_t, align 8
18688 // CHECK:   [[__S1:%.*]] = alloca %struct.int64x1x4_t, align 8
18689 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[B]], i32 0, i32 0
18690 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <1 x i64>]* [[COERCE_DIVE]] to [4 x i64]*
18691 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18692 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int64x1x4_t* [[__S1]] to i8*
18693 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int64x1x4_t* [[B]] to i8*
18694 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18695 // CHECK:   [[TMP3:%.*]] = bitcast i64* %a to i8*
18696 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18697 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL]], i32 0, i32 0
18698 // CHECK:   [[TMP4:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
18699 // CHECK:   [[TMP5:%.*]] = bitcast <1 x i64> [[TMP4]] to <8 x i8>
18700 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18701 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i32 0, i32 1
18702 // CHECK:   [[TMP6:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX2]], align 8
18703 // CHECK:   [[TMP7:%.*]] = bitcast <1 x i64> [[TMP6]] to <8 x i8>
18704 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18705 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL3]], i32 0, i32 2
18706 // CHECK:   [[TMP8:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX4]], align 8
18707 // CHECK:   [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
18708 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int64x1x4_t, %struct.int64x1x4_t* [[__S1]], i32 0, i32 0
18709 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL5]], i32 0, i32 3
18710 // CHECK:   [[TMP10:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX6]], align 8
18711 // CHECK:   [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <8 x i8>
18712 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
18713 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <1 x i64>
18714 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <1 x i64>
18715 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <1 x i64>
18716 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v1i64(i8* [[TMP3]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], <1 x i64> [[TMP15]], i32 4)
18717 // CHECK:   ret void
test_vst4_s64(int64_t * a,int64x1x4_t b)18718 void test_vst4_s64(int64_t * a, int64x1x4_t b) {
18719   vst4_s64(a, b);
18720 }
18721 
18722 // CHECK-LABEL: @test_vst4_f16(
18723 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
18724 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
18725 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
18726 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
18727 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18728 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
18729 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
18730 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18731 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
18732 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18733 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
18734 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
18735 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
18736 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18737 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
18738 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
18739 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
18740 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18741 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
18742 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
18743 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
18744 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
18745 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
18746 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
18747 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
18748 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
18749 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
18750 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
18751 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
18752 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 2)
18753 // CHECK:   ret void
test_vst4_f16(float16_t * a,float16x4x4_t b)18754 void test_vst4_f16(float16_t * a, float16x4x4_t b) {
18755   vst4_f16(a, b);
18756 }
18757 
18758 // CHECK-LABEL: @test_vst4_f32(
18759 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
18760 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
18761 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
18762 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
18763 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18764 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
18765 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
18766 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18767 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
18768 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18769 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
18770 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
18771 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
18772 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18773 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
18774 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
18775 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
18776 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18777 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
18778 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
18779 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
18780 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
18781 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
18782 // CHECK:   [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
18783 // CHECK:   [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
18784 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
18785 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
18786 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
18787 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
18788 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 4)
18789 // CHECK:   ret void
test_vst4_f32(float32_t * a,float32x2x4_t b)18790 void test_vst4_f32(float32_t * a, float32x2x4_t b) {
18791   vst4_f32(a, b);
18792 }
18793 
18794 // CHECK-LABEL: @test_vst4_p8(
18795 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
18796 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
18797 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
18798 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
18799 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18800 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
18801 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
18802 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18803 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18804 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
18805 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
18806 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18807 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
18808 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
18809 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18810 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
18811 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
18812 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
18813 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
18814 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
18815 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 1)
18816 // CHECK:   ret void
test_vst4_p8(poly8_t * a,poly8x8x4_t b)18817 void test_vst4_p8(poly8_t * a, poly8x8x4_t b) {
18818   vst4_p8(a, b);
18819 }
18820 
18821 // CHECK-LABEL: @test_vst4_p16(
18822 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
18823 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
18824 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
18825 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
18826 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
18827 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
18828 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
18829 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
18830 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18831 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18832 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
18833 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
18834 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
18835 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18836 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
18837 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
18838 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
18839 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18840 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
18841 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
18842 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
18843 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
18844 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
18845 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
18846 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
18847 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
18848 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
18849 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
18850 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
18851 // CHECK:   call void @llvm.arm.neon.vst4.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 2)
18852 // CHECK:   ret void
test_vst4_p16(poly16_t * a,poly16x4x4_t b)18853 void test_vst4_p16(poly16_t * a, poly16x4x4_t b) {
18854   vst4_p16(a, b);
18855 }
18856 
18857 // CHECK-LABEL: @test_vst4q_lane_u16(
18858 // CHECK:   [[B:%.*]] = alloca %struct.uint16x8x4_t, align 16
18859 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x8x4_t, align 16
18860 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[B]], i32 0, i32 0
18861 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18862 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18863 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x8x4_t* [[__S1]] to i8*
18864 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x8x4_t* [[B]] to i8*
18865 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18866 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18867 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18868 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18869 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18870 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18871 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18872 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18873 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18874 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18875 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18876 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18877 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18878 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18879 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x8x4_t, %struct.uint16x8x4_t* [[__S1]], i32 0, i32 0
18880 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18881 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18882 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18883 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18884 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18885 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18886 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18887 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18888 // CHECK:   ret void
test_vst4q_lane_u16(uint16_t * a,uint16x8x4_t b)18889 void test_vst4q_lane_u16(uint16_t * a, uint16x8x4_t b) {
18890   vst4q_lane_u16(a, b, 7);
18891 }
18892 
18893 // CHECK-LABEL: @test_vst4q_lane_u32(
18894 // CHECK:   [[B:%.*]] = alloca %struct.uint32x4x4_t, align 16
18895 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x4x4_t, align 16
18896 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[B]], i32 0, i32 0
18897 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18898 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18899 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x4x4_t* [[__S1]] to i8*
18900 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x4x4_t* [[B]] to i8*
18901 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18902 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18903 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18904 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18905 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18906 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18907 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18908 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18909 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18910 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18911 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18912 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18913 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18914 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18915 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x4x4_t, %struct.uint32x4x4_t* [[__S1]], i32 0, i32 0
18916 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18917 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18918 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18919 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18920 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18921 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18922 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18923 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18924 // CHECK:   ret void
test_vst4q_lane_u32(uint32_t * a,uint32x4x4_t b)18925 void test_vst4q_lane_u32(uint32_t * a, uint32x4x4_t b) {
18926   vst4q_lane_u32(a, b, 3);
18927 }
18928 
18929 // CHECK-LABEL: @test_vst4q_lane_s16(
18930 // CHECK:   [[B:%.*]] = alloca %struct.int16x8x4_t, align 16
18931 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x8x4_t, align 16
18932 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[B]], i32 0, i32 0
18933 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
18934 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18935 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x8x4_t* [[__S1]] to i8*
18936 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x8x4_t* [[B]] to i8*
18937 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18938 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
18939 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18940 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
18941 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
18942 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
18943 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18944 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
18945 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
18946 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
18947 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18948 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
18949 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
18950 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
18951 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x8x4_t, %struct.int16x8x4_t* [[__S1]], i32 0, i32 0
18952 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
18953 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
18954 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
18955 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
18956 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
18957 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
18958 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
18959 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
18960 // CHECK:   ret void
test_vst4q_lane_s16(int16_t * a,int16x8x4_t b)18961 void test_vst4q_lane_s16(int16_t * a, int16x8x4_t b) {
18962   vst4q_lane_s16(a, b, 7);
18963 }
18964 
18965 // CHECK-LABEL: @test_vst4q_lane_s32(
18966 // CHECK:   [[B:%.*]] = alloca %struct.int32x4x4_t, align 16
18967 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x4x4_t, align 16
18968 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[B]], i32 0, i32 0
18969 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i32>]* [[COERCE_DIVE]] to [8 x i64]*
18970 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
18971 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x4x4_t* [[__S1]] to i8*
18972 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x4x4_t* [[B]] to i8*
18973 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
18974 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
18975 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18976 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL]], i32 0, i32 0
18977 // CHECK:   [[TMP4:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX]], align 16
18978 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <16 x i8>
18979 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18980 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL1]], i32 0, i32 1
18981 // CHECK:   [[TMP6:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX2]], align 16
18982 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i32> [[TMP6]] to <16 x i8>
18983 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18984 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL3]], i32 0, i32 2
18985 // CHECK:   [[TMP8:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX4]], align 16
18986 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i32> [[TMP8]] to <16 x i8>
18987 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x4x4_t, %struct.int32x4x4_t* [[__S1]], i32 0, i32 0
18988 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i32>], [4 x <4 x i32>]* [[VAL5]], i32 0, i32 3
18989 // CHECK:   [[TMP10:%.*]] = load <4 x i32>, <4 x i32>* [[ARRAYIDX6]], align 16
18990 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <16 x i8>
18991 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x i32>
18992 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x i32>
18993 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x i32>
18994 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x i32>
18995 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i32(i8* [[TMP3]], <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], i32 3, i32 4)
18996 // CHECK:   ret void
test_vst4q_lane_s32(int32_t * a,int32x4x4_t b)18997 void test_vst4q_lane_s32(int32_t * a, int32x4x4_t b) {
18998   vst4q_lane_s32(a, b, 3);
18999 }
19000 
19001 // CHECK-LABEL: @test_vst4q_lane_f16(
19002 // CHECK:   [[B:%.*]] = alloca %struct.float16x8x4_t, align 16
19003 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x8x4_t, align 16
19004 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[B]], i32 0, i32 0
19005 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x half>]* [[COERCE_DIVE]] to [8 x i64]*
19006 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19007 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x8x4_t* [[__S1]] to i8*
19008 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x8x4_t* [[B]] to i8*
19009 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19010 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
19011 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19012 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL]], i32 0, i32 0
19013 // CHECK:   [[TMP4:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX]], align 16
19014 // CHECK:   [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8>
19015 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19016 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL1]], i32 0, i32 1
19017 // CHECK:   [[TMP6:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX2]], align 16
19018 // CHECK:   [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <16 x i8>
19019 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19020 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL3]], i32 0, i32 2
19021 // CHECK:   [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX4]], align 16
19022 // CHECK:   [[TMP9:%.*]] = bitcast <8 x half> [[TMP8]] to <16 x i8>
19023 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x8x4_t, %struct.float16x8x4_t* [[__S1]], i32 0, i32 0
19024 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x half>], [4 x <8 x half>]* [[VAL5]], i32 0, i32 3
19025 // CHECK:   [[TMP10:%.*]] = load <8 x half>, <8 x half>* [[ARRAYIDX6]], align 16
19026 // CHECK:   [[TMP11:%.*]] = bitcast <8 x half> [[TMP10]] to <16 x i8>
19027 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x half>
19028 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x half>
19029 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x half>
19030 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x half>
19031 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8f16(i8* [[TMP3]], <8 x half> [[TMP12]], <8 x half> [[TMP13]], <8 x half> [[TMP14]], <8 x half> [[TMP15]], i32 7, i32 2)
19032 // CHECK:   ret void
test_vst4q_lane_f16(float16_t * a,float16x8x4_t b)19033 void test_vst4q_lane_f16(float16_t * a, float16x8x4_t b) {
19034   vst4q_lane_f16(a, b, 7);
19035 }
19036 
19037 // CHECK-LABEL: @test_vst4q_lane_f32(
19038 // CHECK:   [[B:%.*]] = alloca %struct.float32x4x4_t, align 16
19039 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x4x4_t, align 16
19040 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[B]], i32 0, i32 0
19041 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x float>]* [[COERCE_DIVE]] to [8 x i64]*
19042 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19043 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x4x4_t* [[__S1]] to i8*
19044 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x4x4_t* [[B]] to i8*
19045 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19046 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
19047 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19048 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL]], i32 0, i32 0
19049 // CHECK:   [[TMP4:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX]], align 16
19050 // CHECK:   [[TMP5:%.*]] = bitcast <4 x float> [[TMP4]] to <16 x i8>
19051 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19052 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL1]], i32 0, i32 1
19053 // CHECK:   [[TMP6:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX2]], align 16
19054 // CHECK:   [[TMP7:%.*]] = bitcast <4 x float> [[TMP6]] to <16 x i8>
19055 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19056 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL3]], i32 0, i32 2
19057 // CHECK:   [[TMP8:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX4]], align 16
19058 // CHECK:   [[TMP9:%.*]] = bitcast <4 x float> [[TMP8]] to <16 x i8>
19059 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x4x4_t, %struct.float32x4x4_t* [[__S1]], i32 0, i32 0
19060 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x float>], [4 x <4 x float>]* [[VAL5]], i32 0, i32 3
19061 // CHECK:   [[TMP10:%.*]] = load <4 x float>, <4 x float>* [[ARRAYIDX6]], align 16
19062 // CHECK:   [[TMP11:%.*]] = bitcast <4 x float> [[TMP10]] to <16 x i8>
19063 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <4 x float>
19064 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <4 x float>
19065 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <4 x float>
19066 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <4 x float>
19067 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4f32(i8* [[TMP3]], <4 x float> [[TMP12]], <4 x float> [[TMP13]], <4 x float> [[TMP14]], <4 x float> [[TMP15]], i32 3, i32 4)
19068 // CHECK:   ret void
test_vst4q_lane_f32(float32_t * a,float32x4x4_t b)19069 void test_vst4q_lane_f32(float32_t * a, float32x4x4_t b) {
19070   vst4q_lane_f32(a, b, 3);
19071 }
19072 
19073 // CHECK-LABEL: @test_vst4q_lane_p16(
19074 // CHECK:   [[B:%.*]] = alloca %struct.poly16x8x4_t, align 16
19075 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x8x4_t, align 16
19076 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[B]], i32 0, i32 0
19077 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i16>]* [[COERCE_DIVE]] to [8 x i64]*
19078 // CHECK:   store [8 x i64] [[B]].coerce, [8 x i64]* [[TMP0]], align 16
19079 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x8x4_t* [[__S1]] to i8*
19080 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x8x4_t* [[B]] to i8*
19081 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 16 [[TMP1]], i8* align 16 [[TMP2]], i32 64, i1 false)
19082 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19083 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19084 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL]], i32 0, i32 0
19085 // CHECK:   [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX]], align 16
19086 // CHECK:   [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8>
19087 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19088 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL1]], i32 0, i32 1
19089 // CHECK:   [[TMP6:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX2]], align 16
19090 // CHECK:   [[TMP7:%.*]] = bitcast <8 x i16> [[TMP6]] to <16 x i8>
19091 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19092 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL3]], i32 0, i32 2
19093 // CHECK:   [[TMP8:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX4]], align 16
19094 // CHECK:   [[TMP9:%.*]] = bitcast <8 x i16> [[TMP8]] to <16 x i8>
19095 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x8x4_t, %struct.poly16x8x4_t* [[__S1]], i32 0, i32 0
19096 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i16>], [4 x <8 x i16>]* [[VAL5]], i32 0, i32 3
19097 // CHECK:   [[TMP10:%.*]] = load <8 x i16>, <8 x i16>* [[ARRAYIDX6]], align 16
19098 // CHECK:   [[TMP11:%.*]] = bitcast <8 x i16> [[TMP10]] to <16 x i8>
19099 // CHECK:   [[TMP12:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16>
19100 // CHECK:   [[TMP13:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16>
19101 // CHECK:   [[TMP14:%.*]] = bitcast <16 x i8> [[TMP9]] to <8 x i16>
19102 // CHECK:   [[TMP15:%.*]] = bitcast <16 x i8> [[TMP11]] to <8 x i16>
19103 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i16(i8* [[TMP3]], <8 x i16> [[TMP12]], <8 x i16> [[TMP13]], <8 x i16> [[TMP14]], <8 x i16> [[TMP15]], i32 7, i32 2)
19104 // CHECK:   ret void
test_vst4q_lane_p16(poly16_t * a,poly16x8x4_t b)19105 void test_vst4q_lane_p16(poly16_t * a, poly16x8x4_t b) {
19106   vst4q_lane_p16(a, b, 7);
19107 }
19108 
19109 // CHECK-LABEL: @test_vst4_lane_u8(
19110 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
19111 // CHECK:   [[__S1:%.*]] = alloca %struct.uint8x8x4_t, align 8
19112 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
19113 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19114 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19115 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint8x8x4_t* [[__S1]] to i8*
19116 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint8x8x4_t* [[B]] to i8*
19117 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19118 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19119 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19120 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19121 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19122 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19123 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19124 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19125 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19126 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19127 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__S1]], i32 0, i32 0
19128 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19129 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19130 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19131 // CHECK:   ret void
test_vst4_lane_u8(uint8_t * a,uint8x8x4_t b)19132 void test_vst4_lane_u8(uint8_t * a, uint8x8x4_t b) {
19133   vst4_lane_u8(a, b, 7);
19134 }
19135 
19136 // CHECK-LABEL: @test_vst4_lane_u16(
19137 // CHECK:   [[B:%.*]] = alloca %struct.uint16x4x4_t, align 8
19138 // CHECK:   [[__S1:%.*]] = alloca %struct.uint16x4x4_t, align 8
19139 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[B]], i32 0, i32 0
19140 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19141 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19142 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint16x4x4_t* [[__S1]] to i8*
19143 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint16x4x4_t* [[B]] to i8*
19144 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19145 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19146 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19147 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19148 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19149 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19150 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19151 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19152 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19153 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19154 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19155 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19156 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19157 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19158 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint16x4x4_t, %struct.uint16x4x4_t* [[__S1]], i32 0, i32 0
19159 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19160 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19161 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19162 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19163 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19164 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19165 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19166 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19167 // CHECK:   ret void
test_vst4_lane_u16(uint16_t * a,uint16x4x4_t b)19168 void test_vst4_lane_u16(uint16_t * a, uint16x4x4_t b) {
19169   vst4_lane_u16(a, b, 3);
19170 }
19171 
19172 // CHECK-LABEL: @test_vst4_lane_u32(
19173 // CHECK:   [[B:%.*]] = alloca %struct.uint32x2x4_t, align 8
19174 // CHECK:   [[__S1:%.*]] = alloca %struct.uint32x2x4_t, align 8
19175 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[B]], i32 0, i32 0
19176 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19177 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19178 // CHECK:   [[TMP1:%.*]] = bitcast %struct.uint32x2x4_t* [[__S1]] to i8*
19179 // CHECK:   [[TMP2:%.*]] = bitcast %struct.uint32x2x4_t* [[B]] to i8*
19180 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19181 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
19182 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19183 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19184 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19185 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19186 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19187 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19188 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19189 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19190 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19191 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19192 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19193 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19194 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.uint32x2x4_t, %struct.uint32x2x4_t* [[__S1]], i32 0, i32 0
19195 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19196 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19197 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19198 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19199 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19200 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19201 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19202 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19203 // CHECK:   ret void
test_vst4_lane_u32(uint32_t * a,uint32x2x4_t b)19204 void test_vst4_lane_u32(uint32_t * a, uint32x2x4_t b) {
19205   vst4_lane_u32(a, b, 1);
19206 }
19207 
19208 // CHECK-LABEL: @test_vst4_lane_s8(
19209 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
19210 // CHECK:   [[__S1:%.*]] = alloca %struct.int8x8x4_t, align 8
19211 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
19212 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19213 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19214 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int8x8x4_t* [[__S1]] to i8*
19215 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int8x8x4_t* [[B]] to i8*
19216 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19217 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19218 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19219 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19220 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19221 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19222 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19223 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19224 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19225 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19226 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__S1]], i32 0, i32 0
19227 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19228 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19229 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19230 // CHECK:   ret void
test_vst4_lane_s8(int8_t * a,int8x8x4_t b)19231 void test_vst4_lane_s8(int8_t * a, int8x8x4_t b) {
19232   vst4_lane_s8(a, b, 7);
19233 }
19234 
19235 // CHECK-LABEL: @test_vst4_lane_s16(
19236 // CHECK:   [[B:%.*]] = alloca %struct.int16x4x4_t, align 8
19237 // CHECK:   [[__S1:%.*]] = alloca %struct.int16x4x4_t, align 8
19238 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[B]], i32 0, i32 0
19239 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19240 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19241 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int16x4x4_t* [[__S1]] to i8*
19242 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int16x4x4_t* [[B]] to i8*
19243 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19244 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19245 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19246 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19247 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19248 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19249 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19250 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19251 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19252 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19253 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19254 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19255 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19256 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19257 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int16x4x4_t, %struct.int16x4x4_t* [[__S1]], i32 0, i32 0
19258 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19259 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19260 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19261 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19262 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19263 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19264 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19265 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19266 // CHECK:   ret void
test_vst4_lane_s16(int16_t * a,int16x4x4_t b)19267 void test_vst4_lane_s16(int16_t * a, int16x4x4_t b) {
19268   vst4_lane_s16(a, b, 3);
19269 }
19270 
19271 // CHECK-LABEL: @test_vst4_lane_s32(
19272 // CHECK:   [[B:%.*]] = alloca %struct.int32x2x4_t, align 8
19273 // CHECK:   [[__S1:%.*]] = alloca %struct.int32x2x4_t, align 8
19274 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[B]], i32 0, i32 0
19275 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x i32>]* [[COERCE_DIVE]] to [4 x i64]*
19276 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19277 // CHECK:   [[TMP1:%.*]] = bitcast %struct.int32x2x4_t* [[__S1]] to i8*
19278 // CHECK:   [[TMP2:%.*]] = bitcast %struct.int32x2x4_t* [[B]] to i8*
19279 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19280 // CHECK:   [[TMP3:%.*]] = bitcast i32* %a to i8*
19281 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19282 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL]], i32 0, i32 0
19283 // CHECK:   [[TMP4:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX]], align 8
19284 // CHECK:   [[TMP5:%.*]] = bitcast <2 x i32> [[TMP4]] to <8 x i8>
19285 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19286 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL1]], i32 0, i32 1
19287 // CHECK:   [[TMP6:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX2]], align 8
19288 // CHECK:   [[TMP7:%.*]] = bitcast <2 x i32> [[TMP6]] to <8 x i8>
19289 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19290 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL3]], i32 0, i32 2
19291 // CHECK:   [[TMP8:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX4]], align 8
19292 // CHECK:   [[TMP9:%.*]] = bitcast <2 x i32> [[TMP8]] to <8 x i8>
19293 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.int32x2x4_t, %struct.int32x2x4_t* [[__S1]], i32 0, i32 0
19294 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x i32>], [4 x <2 x i32>]* [[VAL5]], i32 0, i32 3
19295 // CHECK:   [[TMP10:%.*]] = load <2 x i32>, <2 x i32>* [[ARRAYIDX6]], align 8
19296 // CHECK:   [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <8 x i8>
19297 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x i32>
19298 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x i32>
19299 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x i32>
19300 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x i32>
19301 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v2i32(i8* [[TMP3]], <2 x i32> [[TMP12]], <2 x i32> [[TMP13]], <2 x i32> [[TMP14]], <2 x i32> [[TMP15]], i32 1, i32 4)
19302 // CHECK:   ret void
test_vst4_lane_s32(int32_t * a,int32x2x4_t b)19303 void test_vst4_lane_s32(int32_t * a, int32x2x4_t b) {
19304   vst4_lane_s32(a, b, 1);
19305 }
19306 
19307 // CHECK-LABEL: @test_vst4_lane_f16(
19308 // CHECK:   [[B:%.*]] = alloca %struct.float16x4x4_t, align 8
19309 // CHECK:   [[__S1:%.*]] = alloca %struct.float16x4x4_t, align 8
19310 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[B]], i32 0, i32 0
19311 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x half>]* [[COERCE_DIVE]] to [4 x i64]*
19312 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19313 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float16x4x4_t* [[__S1]] to i8*
19314 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float16x4x4_t* [[B]] to i8*
19315 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19316 // CHECK:   [[TMP3:%.*]] = bitcast half* %a to i8*
19317 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19318 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL]], i32 0, i32 0
19319 // CHECK:   [[TMP4:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX]], align 8
19320 // CHECK:   [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8>
19321 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19322 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL1]], i32 0, i32 1
19323 // CHECK:   [[TMP6:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX2]], align 8
19324 // CHECK:   [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <8 x i8>
19325 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19326 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL3]], i32 0, i32 2
19327 // CHECK:   [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX4]], align 8
19328 // CHECK:   [[TMP9:%.*]] = bitcast <4 x half> [[TMP8]] to <8 x i8>
19329 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float16x4x4_t, %struct.float16x4x4_t* [[__S1]], i32 0, i32 0
19330 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x half>], [4 x <4 x half>]* [[VAL5]], i32 0, i32 3
19331 // CHECK:   [[TMP10:%.*]] = load <4 x half>, <4 x half>* [[ARRAYIDX6]], align 8
19332 // CHECK:   [[TMP11:%.*]] = bitcast <4 x half> [[TMP10]] to <8 x i8>
19333 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x half>
19334 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x half>
19335 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x half>
19336 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x half>
19337 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4f16(i8* [[TMP3]], <4 x half> [[TMP12]], <4 x half> [[TMP13]], <4 x half> [[TMP14]], <4 x half> [[TMP15]], i32 3, i32 2)
19338 // CHECK:   ret void
test_vst4_lane_f16(float16_t * a,float16x4x4_t b)19339 void test_vst4_lane_f16(float16_t * a, float16x4x4_t b) {
19340   vst4_lane_f16(a, b, 3);
19341 }
19342 
19343 // CHECK-LABEL: @test_vst4_lane_f32(
19344 // CHECK:   [[B:%.*]] = alloca %struct.float32x2x4_t, align 8
19345 // CHECK:   [[__S1:%.*]] = alloca %struct.float32x2x4_t, align 8
19346 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[B]], i32 0, i32 0
19347 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <2 x float>]* [[COERCE_DIVE]] to [4 x i64]*
19348 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19349 // CHECK:   [[TMP1:%.*]] = bitcast %struct.float32x2x4_t* [[__S1]] to i8*
19350 // CHECK:   [[TMP2:%.*]] = bitcast %struct.float32x2x4_t* [[B]] to i8*
19351 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19352 // CHECK:   [[TMP3:%.*]] = bitcast float* %a to i8*
19353 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19354 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL]], i32 0, i32 0
19355 // CHECK:   [[TMP4:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX]], align 8
19356 // CHECK:   [[TMP5:%.*]] = bitcast <2 x float> [[TMP4]] to <8 x i8>
19357 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19358 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL1]], i32 0, i32 1
19359 // CHECK:   [[TMP6:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX2]], align 8
19360 // CHECK:   [[TMP7:%.*]] = bitcast <2 x float> [[TMP6]] to <8 x i8>
19361 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19362 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL3]], i32 0, i32 2
19363 // CHECK:   [[TMP8:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX4]], align 8
19364 // CHECK:   [[TMP9:%.*]] = bitcast <2 x float> [[TMP8]] to <8 x i8>
19365 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.float32x2x4_t, %struct.float32x2x4_t* [[__S1]], i32 0, i32 0
19366 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <2 x float>], [4 x <2 x float>]* [[VAL5]], i32 0, i32 3
19367 // CHECK:   [[TMP10:%.*]] = load <2 x float>, <2 x float>* [[ARRAYIDX6]], align 8
19368 // CHECK:   [[TMP11:%.*]] = bitcast <2 x float> [[TMP10]] to <8 x i8>
19369 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <2 x float>
19370 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <2 x float>
19371 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <2 x float>
19372 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <2 x float>
19373 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v2f32(i8* [[TMP3]], <2 x float> [[TMP12]], <2 x float> [[TMP13]], <2 x float> [[TMP14]], <2 x float> [[TMP15]], i32 1, i32 4)
19374 // CHECK:   ret void
test_vst4_lane_f32(float32_t * a,float32x2x4_t b)19375 void test_vst4_lane_f32(float32_t * a, float32x2x4_t b) {
19376   vst4_lane_f32(a, b, 1);
19377 }
19378 
19379 // CHECK-LABEL: @test_vst4_lane_p8(
19380 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
19381 // CHECK:   [[__S1:%.*]] = alloca %struct.poly8x8x4_t, align 8
19382 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
19383 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19384 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19385 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly8x8x4_t* [[__S1]] to i8*
19386 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly8x8x4_t* [[B]] to i8*
19387 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19388 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19389 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL]], i32 0, i32 0
19390 // CHECK:   [[TMP3:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX]], align 8
19391 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19392 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1]], i32 0, i32 1
19393 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2]], align 8
19394 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19395 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3]], i32 0, i32 2
19396 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4]], align 8
19397 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__S1]], i32 0, i32 0
19398 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5]], i32 0, i32 3
19399 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6]], align 8
19400 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v8i8(i8* %a, <8 x i8> [[TMP3]], <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], i32 7, i32 1)
19401 // CHECK:   ret void
test_vst4_lane_p8(poly8_t * a,poly8x8x4_t b)19402 void test_vst4_lane_p8(poly8_t * a, poly8x8x4_t b) {
19403   vst4_lane_p8(a, b, 7);
19404 }
19405 
19406 // CHECK-LABEL: @test_vst4_lane_p16(
19407 // CHECK:   [[B:%.*]] = alloca %struct.poly16x4x4_t, align 8
19408 // CHECK:   [[__S1:%.*]] = alloca %struct.poly16x4x4_t, align 8
19409 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[B]], i32 0, i32 0
19410 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <4 x i16>]* [[COERCE_DIVE]] to [4 x i64]*
19411 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
19412 // CHECK:   [[TMP1:%.*]] = bitcast %struct.poly16x4x4_t* [[__S1]] to i8*
19413 // CHECK:   [[TMP2:%.*]] = bitcast %struct.poly16x4x4_t* [[B]] to i8*
19414 // CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 8 [[TMP1]], i8* align 8 [[TMP2]], i32 32, i1 false)
19415 // CHECK:   [[TMP3:%.*]] = bitcast i16* %a to i8*
19416 // CHECK:   [[VAL:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19417 // CHECK:   [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL]], i32 0, i32 0
19418 // CHECK:   [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX]], align 8
19419 // CHECK:   [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8>
19420 // CHECK:   [[VAL1:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19421 // CHECK:   [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL1]], i32 0, i32 1
19422 // CHECK:   [[TMP6:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX2]], align 8
19423 // CHECK:   [[TMP7:%.*]] = bitcast <4 x i16> [[TMP6]] to <8 x i8>
19424 // CHECK:   [[VAL3:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19425 // CHECK:   [[ARRAYIDX4:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL3]], i32 0, i32 2
19426 // CHECK:   [[TMP8:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX4]], align 8
19427 // CHECK:   [[TMP9:%.*]] = bitcast <4 x i16> [[TMP8]] to <8 x i8>
19428 // CHECK:   [[VAL5:%.*]] = getelementptr inbounds %struct.poly16x4x4_t, %struct.poly16x4x4_t* [[__S1]], i32 0, i32 0
19429 // CHECK:   [[ARRAYIDX6:%.*]] = getelementptr inbounds [4 x <4 x i16>], [4 x <4 x i16>]* [[VAL5]], i32 0, i32 3
19430 // CHECK:   [[TMP10:%.*]] = load <4 x i16>, <4 x i16>* [[ARRAYIDX6]], align 8
19431 // CHECK:   [[TMP11:%.*]] = bitcast <4 x i16> [[TMP10]] to <8 x i8>
19432 // CHECK:   [[TMP12:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16>
19433 // CHECK:   [[TMP13:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16>
19434 // CHECK:   [[TMP14:%.*]] = bitcast <8 x i8> [[TMP9]] to <4 x i16>
19435 // CHECK:   [[TMP15:%.*]] = bitcast <8 x i8> [[TMP11]] to <4 x i16>
19436 // CHECK:   call void @llvm.arm.neon.vst4lane.p0i8.v4i16(i8* [[TMP3]], <4 x i16> [[TMP12]], <4 x i16> [[TMP13]], <4 x i16> [[TMP14]], <4 x i16> [[TMP15]], i32 3, i32 2)
19437 // CHECK:   ret void
test_vst4_lane_p16(poly16_t * a,poly16x4x4_t b)19438 void test_vst4_lane_p16(poly16_t * a, poly16x4x4_t b) {
19439   vst4_lane_p16(a, b, 3);
19440 }
19441 
19442 // CHECK-LABEL: @test_vsub_s8(
19443 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19444 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_s8(int8x8_t a,int8x8_t b)19445 int8x8_t test_vsub_s8(int8x8_t a, int8x8_t b) {
19446   return vsub_s8(a, b);
19447 }
19448 
19449 // CHECK-LABEL: @test_vsub_s16(
19450 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19451 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_s16(int16x4_t a,int16x4_t b)19452 int16x4_t test_vsub_s16(int16x4_t a, int16x4_t b) {
19453   return vsub_s16(a, b);
19454 }
19455 
19456 // CHECK-LABEL: @test_vsub_s32(
19457 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19458 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_s32(int32x2_t a,int32x2_t b)19459 int32x2_t test_vsub_s32(int32x2_t a, int32x2_t b) {
19460   return vsub_s32(a, b);
19461 }
19462 
19463 // CHECK-LABEL: @test_vsub_s64(
19464 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19465 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_s64(int64x1_t a,int64x1_t b)19466 int64x1_t test_vsub_s64(int64x1_t a, int64x1_t b) {
19467   return vsub_s64(a, b);
19468 }
19469 
19470 // CHECK-LABEL: @test_vsub_f32(
19471 // CHECK:   [[SUB_I:%.*]] = fsub <2 x float> %a, %b
19472 // CHECK:   ret <2 x float> [[SUB_I]]
test_vsub_f32(float32x2_t a,float32x2_t b)19473 float32x2_t test_vsub_f32(float32x2_t a, float32x2_t b) {
19474   return vsub_f32(a, b);
19475 }
19476 
19477 // CHECK-LABEL: @test_vsub_u8(
19478 // CHECK:   [[SUB_I:%.*]] = sub <8 x i8> %a, %b
19479 // CHECK:   ret <8 x i8> [[SUB_I]]
test_vsub_u8(uint8x8_t a,uint8x8_t b)19480 uint8x8_t test_vsub_u8(uint8x8_t a, uint8x8_t b) {
19481   return vsub_u8(a, b);
19482 }
19483 
19484 // CHECK-LABEL: @test_vsub_u16(
19485 // CHECK:   [[SUB_I:%.*]] = sub <4 x i16> %a, %b
19486 // CHECK:   ret <4 x i16> [[SUB_I]]
test_vsub_u16(uint16x4_t a,uint16x4_t b)19487 uint16x4_t test_vsub_u16(uint16x4_t a, uint16x4_t b) {
19488   return vsub_u16(a, b);
19489 }
19490 
19491 // CHECK-LABEL: @test_vsub_u32(
19492 // CHECK:   [[SUB_I:%.*]] = sub <2 x i32> %a, %b
19493 // CHECK:   ret <2 x i32> [[SUB_I]]
test_vsub_u32(uint32x2_t a,uint32x2_t b)19494 uint32x2_t test_vsub_u32(uint32x2_t a, uint32x2_t b) {
19495   return vsub_u32(a, b);
19496 }
19497 
19498 // CHECK-LABEL: @test_vsub_u64(
19499 // CHECK:   [[SUB_I:%.*]] = sub <1 x i64> %a, %b
19500 // CHECK:   ret <1 x i64> [[SUB_I]]
test_vsub_u64(uint64x1_t a,uint64x1_t b)19501 uint64x1_t test_vsub_u64(uint64x1_t a, uint64x1_t b) {
19502   return vsub_u64(a, b);
19503 }
19504 
19505 // CHECK-LABEL: @test_vsubq_s8(
19506 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19507 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_s8(int8x16_t a,int8x16_t b)19508 int8x16_t test_vsubq_s8(int8x16_t a, int8x16_t b) {
19509   return vsubq_s8(a, b);
19510 }
19511 
19512 // CHECK-LABEL: @test_vsubq_s16(
19513 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19514 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_s16(int16x8_t a,int16x8_t b)19515 int16x8_t test_vsubq_s16(int16x8_t a, int16x8_t b) {
19516   return vsubq_s16(a, b);
19517 }
19518 
19519 // CHECK-LABEL: @test_vsubq_s32(
19520 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19521 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_s32(int32x4_t a,int32x4_t b)19522 int32x4_t test_vsubq_s32(int32x4_t a, int32x4_t b) {
19523   return vsubq_s32(a, b);
19524 }
19525 
19526 // CHECK-LABEL: @test_vsubq_s64(
19527 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19528 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_s64(int64x2_t a,int64x2_t b)19529 int64x2_t test_vsubq_s64(int64x2_t a, int64x2_t b) {
19530   return vsubq_s64(a, b);
19531 }
19532 
19533 // CHECK-LABEL: @test_vsubq_f32(
19534 // CHECK:   [[SUB_I:%.*]] = fsub <4 x float> %a, %b
19535 // CHECK:   ret <4 x float> [[SUB_I]]
test_vsubq_f32(float32x4_t a,float32x4_t b)19536 float32x4_t test_vsubq_f32(float32x4_t a, float32x4_t b) {
19537   return vsubq_f32(a, b);
19538 }
19539 
19540 // CHECK-LABEL: @test_vsubq_u8(
19541 // CHECK:   [[SUB_I:%.*]] = sub <16 x i8> %a, %b
19542 // CHECK:   ret <16 x i8> [[SUB_I]]
test_vsubq_u8(uint8x16_t a,uint8x16_t b)19543 uint8x16_t test_vsubq_u8(uint8x16_t a, uint8x16_t b) {
19544   return vsubq_u8(a, b);
19545 }
19546 
19547 // CHECK-LABEL: @test_vsubq_u16(
19548 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, %b
19549 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubq_u16(uint16x8_t a,uint16x8_t b)19550 uint16x8_t test_vsubq_u16(uint16x8_t a, uint16x8_t b) {
19551   return vsubq_u16(a, b);
19552 }
19553 
19554 // CHECK-LABEL: @test_vsubq_u32(
19555 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, %b
19556 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubq_u32(uint32x4_t a,uint32x4_t b)19557 uint32x4_t test_vsubq_u32(uint32x4_t a, uint32x4_t b) {
19558   return vsubq_u32(a, b);
19559 }
19560 
19561 // CHECK-LABEL: @test_vsubq_u64(
19562 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, %b
19563 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubq_u64(uint64x2_t a,uint64x2_t b)19564 uint64x2_t test_vsubq_u64(uint64x2_t a, uint64x2_t b) {
19565   return vsubq_u64(a, b);
19566 }
19567 
19568 // CHECK-LABEL: @test_vsubhn_s16(
19569 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19570 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19571 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19572 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19573 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19574 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_s16(int16x8_t a,int16x8_t b)19575 int8x8_t test_vsubhn_s16(int16x8_t a, int16x8_t b) {
19576   return vsubhn_s16(a, b);
19577 }
19578 
19579 // CHECK-LABEL: @test_vsubhn_s32(
19580 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19581 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19582 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19583 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19584 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19585 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_s32(int32x4_t a,int32x4_t b)19586 int16x4_t test_vsubhn_s32(int32x4_t a, int32x4_t b) {
19587   return vsubhn_s32(a, b);
19588 }
19589 
19590 // CHECK-LABEL: @test_vsubhn_s64(
19591 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19592 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19593 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19594 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19595 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19596 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_s64(int64x2_t a,int64x2_t b)19597 int32x2_t test_vsubhn_s64(int64x2_t a, int64x2_t b) {
19598   return vsubhn_s64(a, b);
19599 }
19600 
19601 // CHECK-LABEL: @test_vsubhn_u16(
19602 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
19603 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
19604 // CHECK:   [[VSUBHN_I:%.*]] = sub <8 x i16> %a, %b
19605 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <8 x i16> [[VSUBHN_I]], <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
19606 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <8 x i16> [[VSUBHN1_I]] to <8 x i8>
19607 // CHECK:   ret <8 x i8> [[VSUBHN2_I]]
test_vsubhn_u16(uint16x8_t a,uint16x8_t b)19608 uint8x8_t test_vsubhn_u16(uint16x8_t a, uint16x8_t b) {
19609   return vsubhn_u16(a, b);
19610 }
19611 
19612 // CHECK-LABEL: @test_vsubhn_u32(
19613 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
19614 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
19615 // CHECK:   [[VSUBHN_I:%.*]] = sub <4 x i32> %a, %b
19616 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <4 x i32> [[VSUBHN_I]], <i32 16, i32 16, i32 16, i32 16>
19617 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <4 x i32> [[VSUBHN1_I]] to <4 x i16>
19618 // CHECK:   ret <4 x i16> [[VSUBHN2_I]]
test_vsubhn_u32(uint32x4_t a,uint32x4_t b)19619 uint16x4_t test_vsubhn_u32(uint32x4_t a, uint32x4_t b) {
19620   return vsubhn_u32(a, b);
19621 }
19622 
19623 // CHECK-LABEL: @test_vsubhn_u64(
19624 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
19625 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
19626 // CHECK:   [[VSUBHN_I:%.*]] = sub <2 x i64> %a, %b
19627 // CHECK:   [[VSUBHN1_I:%.*]] = lshr <2 x i64> [[VSUBHN_I]], <i64 32, i64 32>
19628 // CHECK:   [[VSUBHN2_I:%.*]] = trunc <2 x i64> [[VSUBHN1_I]] to <2 x i32>
19629 // CHECK:   ret <2 x i32> [[VSUBHN2_I]]
test_vsubhn_u64(uint64x2_t a,uint64x2_t b)19630 uint32x2_t test_vsubhn_u64(uint64x2_t a, uint64x2_t b) {
19631   return vsubhn_u64(a, b);
19632 }
19633 
19634 // CHECK-LABEL: @test_vsubl_s8(
19635 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %a to <8 x i16>
19636 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19637 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19638 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_s8(int8x8_t a,int8x8_t b)19639 int16x8_t test_vsubl_s8(int8x8_t a, int8x8_t b) {
19640   return vsubl_s8(a, b);
19641 }
19642 
19643 // CHECK-LABEL: @test_vsubl_s16(
19644 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19645 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %a to <4 x i32>
19646 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19647 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19648 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19649 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_s16(int16x4_t a,int16x4_t b)19650 int32x4_t test_vsubl_s16(int16x4_t a, int16x4_t b) {
19651   return vsubl_s16(a, b);
19652 }
19653 
19654 // CHECK-LABEL: @test_vsubl_s32(
19655 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19656 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %a to <2 x i64>
19657 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19658 // CHECK:   [[VMOVL_I4_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19659 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19660 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_s32(int32x2_t a,int32x2_t b)19661 int64x2_t test_vsubl_s32(int32x2_t a, int32x2_t b) {
19662   return vsubl_s32(a, b);
19663 }
19664 
19665 // CHECK-LABEL: @test_vsubl_u8(
19666 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %a to <8 x i16>
19667 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19668 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19669 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubl_u8(uint8x8_t a,uint8x8_t b)19670 uint16x8_t test_vsubl_u8(uint8x8_t a, uint8x8_t b) {
19671   return vsubl_u8(a, b);
19672 }
19673 
19674 // CHECK-LABEL: @test_vsubl_u16(
19675 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
19676 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %a to <4 x i32>
19677 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19678 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19679 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19680 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubl_u16(uint16x4_t a,uint16x4_t b)19681 uint32x4_t test_vsubl_u16(uint16x4_t a, uint16x4_t b) {
19682   return vsubl_u16(a, b);
19683 }
19684 
19685 // CHECK-LABEL: @test_vsubl_u32(
19686 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
19687 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %a to <2 x i64>
19688 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19689 // CHECK:   [[VMOVL_I4_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19690 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> [[VMOVL_I_I]], [[VMOVL_I4_I]]
19691 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubl_u32(uint32x2_t a,uint32x2_t b)19692 uint64x2_t test_vsubl_u32(uint32x2_t a, uint32x2_t b) {
19693   return vsubl_u32(a, b);
19694 }
19695 
19696 // CHECK-LABEL: @test_vsubw_s8(
19697 // CHECK:   [[VMOVL_I_I:%.*]] = sext <8 x i8> %b to <8 x i16>
19698 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19699 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_s8(int16x8_t a,int8x8_t b)19700 int16x8_t test_vsubw_s8(int16x8_t a, int8x8_t b) {
19701   return vsubw_s8(a, b);
19702 }
19703 
19704 // CHECK-LABEL: @test_vsubw_s16(
19705 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19706 // CHECK:   [[VMOVL_I_I:%.*]] = sext <4 x i16> %b to <4 x i32>
19707 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19708 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_s16(int32x4_t a,int16x4_t b)19709 int32x4_t test_vsubw_s16(int32x4_t a, int16x4_t b) {
19710   return vsubw_s16(a, b);
19711 }
19712 
19713 // CHECK-LABEL: @test_vsubw_s32(
19714 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19715 // CHECK:   [[VMOVL_I_I:%.*]] = sext <2 x i32> %b to <2 x i64>
19716 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19717 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_s32(int64x2_t a,int32x2_t b)19718 int64x2_t test_vsubw_s32(int64x2_t a, int32x2_t b) {
19719   return vsubw_s32(a, b);
19720 }
19721 
19722 // CHECK-LABEL: @test_vsubw_u8(
19723 // CHECK:   [[VMOVL_I_I:%.*]] = zext <8 x i8> %b to <8 x i16>
19724 // CHECK:   [[SUB_I:%.*]] = sub <8 x i16> %a, [[VMOVL_I_I]]
19725 // CHECK:   ret <8 x i16> [[SUB_I]]
test_vsubw_u8(uint16x8_t a,uint8x8_t b)19726 uint16x8_t test_vsubw_u8(uint16x8_t a, uint8x8_t b) {
19727   return vsubw_u8(a, b);
19728 }
19729 
19730 // CHECK-LABEL: @test_vsubw_u16(
19731 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8>
19732 // CHECK:   [[VMOVL_I_I:%.*]] = zext <4 x i16> %b to <4 x i32>
19733 // CHECK:   [[SUB_I:%.*]] = sub <4 x i32> %a, [[VMOVL_I_I]]
19734 // CHECK:   ret <4 x i32> [[SUB_I]]
test_vsubw_u16(uint32x4_t a,uint16x4_t b)19735 uint32x4_t test_vsubw_u16(uint32x4_t a, uint16x4_t b) {
19736   return vsubw_u16(a, b);
19737 }
19738 
19739 // CHECK-LABEL: @test_vsubw_u32(
19740 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8>
19741 // CHECK:   [[VMOVL_I_I:%.*]] = zext <2 x i32> %b to <2 x i64>
19742 // CHECK:   [[SUB_I:%.*]] = sub <2 x i64> %a, [[VMOVL_I_I]]
19743 // CHECK:   ret <2 x i64> [[SUB_I]]
test_vsubw_u32(uint64x2_t a,uint32x2_t b)19744 uint64x2_t test_vsubw_u32(uint64x2_t a, uint32x2_t b) {
19745   return vsubw_u32(a, b);
19746 }
19747 
19748 // CHECK-LABEL: @test_vtbl1_u8(
19749 // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19750 // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vtbl1_u8(uint8x8_t a,uint8x8_t b)19751 uint8x8_t test_vtbl1_u8(uint8x8_t a, uint8x8_t b) {
19752   return vtbl1_u8(a, b);
19753 }
19754 
19755 // CHECK-LABEL: @test_vtbl1_s8(
19756 // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19757 // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vtbl1_s8(int8x8_t a,int8x8_t b)19758 int8x8_t test_vtbl1_s8(int8x8_t a, int8x8_t b) {
19759   return vtbl1_s8(a, b);
19760 }
19761 
19762 // CHECK-LABEL: @test_vtbl1_p8(
19763 // CHECK:   [[VTBL1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl1(<8 x i8> %a, <8 x i8> %b)
19764 // CHECK:   ret <8 x i8> [[VTBL1_I]]
test_vtbl1_p8(poly8x8_t a,uint8x8_t b)19765 poly8x8_t test_vtbl1_p8(poly8x8_t a, uint8x8_t b) {
19766   return vtbl1_p8(a, b);
19767 }
19768 
19769 // CHECK-LABEL: @test_vtbl2_u8(
19770 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
19771 // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x2_t, align 8
19772 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19773 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19774 // CHECK:   store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19775 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[A]], i32 0, i32 0
19776 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19777 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19778 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19779 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19780 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19781 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19782 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19783 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19784 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P0_I]], i32 0, i32 0
19785 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19786 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19787 // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19788 // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vtbl2_u8(uint8x8x2_t a,uint8x8_t b)19789 uint8x8_t test_vtbl2_u8(uint8x8x2_t a, uint8x8_t b) {
19790   return vtbl2_u8(a, b);
19791 }
19792 
19793 // CHECK-LABEL: @test_vtbl2_s8(
19794 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x2_t, align 8
19795 // CHECK:   [[A:%.*]] = alloca %struct.int8x8x2_t, align 8
19796 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19797 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19798 // CHECK:   store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19799 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[A]], i32 0, i32 0
19800 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19801 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19802 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19803 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19804 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19805 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19806 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19807 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19808 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P0_I]], i32 0, i32 0
19809 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19810 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19811 // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19812 // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vtbl2_s8(int8x8x2_t a,int8x8_t b)19813 int8x8_t test_vtbl2_s8(int8x8x2_t a, int8x8_t b) {
19814   return vtbl2_s8(a, b);
19815 }
19816 
19817 // CHECK-LABEL: @test_vtbl2_p8(
19818 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
19819 // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x2_t, align 8
19820 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19821 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
19822 // CHECK:   store [2 x i64] [[A]].coerce, [2 x i64]* [[TMP0]], align 8
19823 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[A]], i32 0, i32 0
19824 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
19825 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
19826 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19827 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
19828 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
19829 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19830 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19831 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19832 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P0_I]], i32 0, i32 0
19833 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19834 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19835 // CHECK:   [[VTBL2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %b)
19836 // CHECK:   ret <8 x i8> [[VTBL2_I]]
test_vtbl2_p8(poly8x8x2_t a,uint8x8_t b)19837 poly8x8_t test_vtbl2_p8(poly8x8x2_t a, uint8x8_t b) {
19838   return vtbl2_p8(a, b);
19839 }
19840 
19841 // CHECK-LABEL: @test_vtbl3_u8(
19842 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
19843 // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x3_t, align 8
19844 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19845 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19846 // CHECK:   store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19847 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[A]], i32 0, i32 0
19848 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19849 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19850 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19851 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19852 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19853 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19854 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19855 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19856 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19857 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19858 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19859 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P0_I]], i32 0, i32 0
19860 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19861 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19862 // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19863 // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vtbl3_u8(uint8x8x3_t a,uint8x8_t b)19864 uint8x8_t test_vtbl3_u8(uint8x8x3_t a, uint8x8_t b) {
19865   return vtbl3_u8(a, b);
19866 }
19867 
19868 // CHECK-LABEL: @test_vtbl3_s8(
19869 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x3_t, align 8
19870 // CHECK:   [[A:%.*]] = alloca %struct.int8x8x3_t, align 8
19871 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19872 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19873 // CHECK:   store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19874 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[A]], i32 0, i32 0
19875 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19876 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19877 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19878 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19879 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19880 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19881 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19882 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19883 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19884 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19885 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19886 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P0_I]], i32 0, i32 0
19887 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19888 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19889 // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19890 // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vtbl3_s8(int8x8x3_t a,int8x8_t b)19891 int8x8_t test_vtbl3_s8(int8x8x3_t a, int8x8_t b) {
19892   return vtbl3_s8(a, b);
19893 }
19894 
19895 // CHECK-LABEL: @test_vtbl3_p8(
19896 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
19897 // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x3_t, align 8
19898 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19899 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
19900 // CHECK:   store [3 x i64] [[A]].coerce, [3 x i64]* [[TMP0]], align 8
19901 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[A]], i32 0, i32 0
19902 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
19903 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
19904 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19905 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
19906 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
19907 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19908 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19909 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19910 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19911 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19912 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19913 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P0_I]], i32 0, i32 0
19914 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19915 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19916 // CHECK:   [[VTBL3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl3(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %b)
19917 // CHECK:   ret <8 x i8> [[VTBL3_I]]
test_vtbl3_p8(poly8x8x3_t a,uint8x8_t b)19918 poly8x8_t test_vtbl3_p8(poly8x8x3_t a, uint8x8_t b) {
19919   return vtbl3_p8(a, b);
19920 }
19921 
19922 // CHECK-LABEL: @test_vtbl4_u8(
19923 // CHECK:   [[__P0_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
19924 // CHECK:   [[A:%.*]] = alloca %struct.uint8x8x4_t, align 8
19925 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19926 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19927 // CHECK:   store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19928 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[A]], i32 0, i32 0
19929 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19930 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19931 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19932 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19933 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19934 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19935 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19936 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19937 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19938 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19939 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19940 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19941 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19942 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19943 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P0_I]], i32 0, i32 0
19944 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19945 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19946 // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19947 // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vtbl4_u8(uint8x8x4_t a,uint8x8_t b)19948 uint8x8_t test_vtbl4_u8(uint8x8x4_t a, uint8x8_t b) {
19949   return vtbl4_u8(a, b);
19950 }
19951 
19952 // CHECK-LABEL: @test_vtbl4_s8(
19953 // CHECK:   [[__P0_I:%.*]] = alloca %struct.int8x8x4_t, align 8
19954 // CHECK:   [[A:%.*]] = alloca %struct.int8x8x4_t, align 8
19955 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19956 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19957 // CHECK:   store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19958 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[A]], i32 0, i32 0
19959 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19960 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19961 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19962 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19963 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19964 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19965 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19966 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19967 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19968 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19969 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
19970 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19971 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
19972 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
19973 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P0_I]], i32 0, i32 0
19974 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
19975 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
19976 // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
19977 // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vtbl4_s8(int8x8x4_t a,int8x8_t b)19978 int8x8_t test_vtbl4_s8(int8x8x4_t a, int8x8_t b) {
19979   return vtbl4_s8(a, b);
19980 }
19981 
19982 // CHECK-LABEL: @test_vtbl4_p8(
19983 // CHECK:   [[__P0_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
19984 // CHECK:   [[A:%.*]] = alloca %struct.poly8x8x4_t, align 8
19985 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19986 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
19987 // CHECK:   store [4 x i64] [[A]].coerce, [4 x i64]* [[TMP0]], align 8
19988 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[A]], i32 0, i32 0
19989 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
19990 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
19991 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19992 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
19993 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
19994 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19995 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
19996 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
19997 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
19998 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
19999 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20000 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
20001 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20002 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20003 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P0_I]], i32 0, i32 0
20004 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20005 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20006 // CHECK:   [[VTBL4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbl4(<8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %b)
20007 // CHECK:   ret <8 x i8> [[VTBL4_I]]
test_vtbl4_p8(poly8x8x4_t a,uint8x8_t b)20008 poly8x8_t test_vtbl4_p8(poly8x8x4_t a, uint8x8_t b) {
20009   return vtbl4_p8(a, b);
20010 }
20011 
20012 // CHECK-LABEL: @test_vtbx1_u8(
20013 // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
20014 // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vtbx1_u8(uint8x8_t a,uint8x8_t b,uint8x8_t c)20015 uint8x8_t test_vtbx1_u8(uint8x8_t a, uint8x8_t b, uint8x8_t c) {
20016   return vtbx1_u8(a, b, c);
20017 }
20018 
20019 // CHECK-LABEL: @test_vtbx1_s8(
20020 // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
20021 // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vtbx1_s8(int8x8_t a,int8x8_t b,int8x8_t c)20022 int8x8_t test_vtbx1_s8(int8x8_t a, int8x8_t b, int8x8_t c) {
20023   return vtbx1_s8(a, b, c);
20024 }
20025 
20026 // CHECK-LABEL: @test_vtbx1_p8(
20027 // CHECK:   [[VTBX1_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c)
20028 // CHECK:   ret <8 x i8> [[VTBX1_I]]
test_vtbx1_p8(poly8x8_t a,poly8x8_t b,uint8x8_t c)20029 poly8x8_t test_vtbx1_p8(poly8x8_t a, poly8x8_t b, uint8x8_t c) {
20030   return vtbx1_p8(a, b, c);
20031 }
20032 
20033 // CHECK-LABEL: @test_vtbx2_u8(
20034 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
20035 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x2_t, align 8
20036 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
20037 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20038 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20039 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[B]], i32 0, i32 0
20040 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20041 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20042 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
20043 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20044 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20045 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
20046 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20047 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20048 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[__P1_I]], i32 0, i32 0
20049 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20050 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20051 // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20052 // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vtbx2_u8(uint8x8_t a,uint8x8x2_t b,uint8x8_t c)20053 uint8x8_t test_vtbx2_u8(uint8x8_t a, uint8x8x2_t b, uint8x8_t c) {
20054   return vtbx2_u8(a, b, c);
20055 }
20056 
20057 // CHECK-LABEL: @test_vtbx2_s8(
20058 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x2_t, align 8
20059 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x2_t, align 8
20060 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20061 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20062 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20063 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[B]], i32 0, i32 0
20064 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20065 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20066 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20067 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20068 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20069 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20070 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20071 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20072 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[__P1_I]], i32 0, i32 0
20073 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20074 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20075 // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20076 // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vtbx2_s8(int8x8_t a,int8x8x2_t b,int8x8_t c)20077 int8x8_t test_vtbx2_s8(int8x8_t a, int8x8x2_t b, int8x8_t c) {
20078   return vtbx2_s8(a, b, c);
20079 }
20080 
20081 // CHECK-LABEL: @test_vtbx2_p8(
20082 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
20083 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x2_t, align 8
20084 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20085 // CHECK:   [[TMP0:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE]] to [2 x i64]*
20086 // CHECK:   store [2 x i64] [[B]].coerce, [2 x i64]* [[TMP0]], align 8
20087 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[B]], i32 0, i32 0
20088 // CHECK:   [[TMP1:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE1]] to [2 x i64]*
20089 // CHECK:   [[TMP2:%.*]] = load [2 x i64], [2 x i64]* [[TMP1]], align 8
20090 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20091 // CHECK:   [[TMP3:%.*]] = bitcast [2 x <8 x i8>]* [[COERCE_DIVE_I]] to [2 x i64]*
20092 // CHECK:   store [2 x i64] [[TMP2]], [2 x i64]* [[TMP3]], align 8
20093 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20094 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20095 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20096 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[__P1_I]], i32 0, i32 0
20097 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [2 x <8 x i8>], [2 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20098 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20099 // CHECK:   [[VTBX2_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx2(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> %c)
20100 // CHECK:   ret <8 x i8> [[VTBX2_I]]
test_vtbx2_p8(poly8x8_t a,poly8x8x2_t b,uint8x8_t c)20101 poly8x8_t test_vtbx2_p8(poly8x8_t a, poly8x8x2_t b, uint8x8_t c) {
20102   return vtbx2_p8(a, b, c);
20103 }
20104 
20105 // CHECK-LABEL: @test_vtbx3_u8(
20106 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x3_t, align 8
20107 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x3_t, align 8
20108 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20109 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20110 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20111 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[B]], i32 0, i32 0
20112 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20113 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20114 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20115 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20116 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20117 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20118 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20119 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20120 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20121 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20122 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20123 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x3_t, %struct.uint8x8x3_t* [[__P1_I]], i32 0, i32 0
20124 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20125 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20126 // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20127 // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vtbx3_u8(uint8x8_t a,uint8x8x3_t b,uint8x8_t c)20128 uint8x8_t test_vtbx3_u8(uint8x8_t a, uint8x8x3_t b, uint8x8_t c) {
20129   return vtbx3_u8(a, b, c);
20130 }
20131 
20132 // CHECK-LABEL: @test_vtbx3_s8(
20133 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x3_t, align 8
20134 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x3_t, align 8
20135 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20136 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20137 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20138 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[B]], i32 0, i32 0
20139 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20140 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20141 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20142 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20143 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20144 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20145 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20146 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20147 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20148 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20149 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20150 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x3_t, %struct.int8x8x3_t* [[__P1_I]], i32 0, i32 0
20151 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20152 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20153 // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20154 // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vtbx3_s8(int8x8_t a,int8x8x3_t b,int8x8_t c)20155 int8x8_t test_vtbx3_s8(int8x8_t a, int8x8x3_t b, int8x8_t c) {
20156   return vtbx3_s8(a, b, c);
20157 }
20158 
20159 // CHECK-LABEL: @test_vtbx3_p8(
20160 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x3_t, align 8
20161 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x3_t, align 8
20162 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20163 // CHECK:   [[TMP0:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE]] to [3 x i64]*
20164 // CHECK:   store [3 x i64] [[B]].coerce, [3 x i64]* [[TMP0]], align 8
20165 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[B]], i32 0, i32 0
20166 // CHECK:   [[TMP1:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE1]] to [3 x i64]*
20167 // CHECK:   [[TMP2:%.*]] = load [3 x i64], [3 x i64]* [[TMP1]], align 8
20168 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20169 // CHECK:   [[TMP3:%.*]] = bitcast [3 x <8 x i8>]* [[COERCE_DIVE_I]] to [3 x i64]*
20170 // CHECK:   store [3 x i64] [[TMP2]], [3 x i64]* [[TMP3]], align 8
20171 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20172 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20173 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20174 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20175 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20176 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20177 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x3_t, %struct.poly8x8x3_t* [[__P1_I]], i32 0, i32 0
20178 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [3 x <8 x i8>], [3 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20179 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20180 // CHECK:   [[VTBX3_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx3(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> %c)
20181 // CHECK:   ret <8 x i8> [[VTBX3_I]]
test_vtbx3_p8(poly8x8_t a,poly8x8x3_t b,uint8x8_t c)20182 poly8x8_t test_vtbx3_p8(poly8x8_t a, poly8x8x3_t b, uint8x8_t c) {
20183   return vtbx3_p8(a, b, c);
20184 }
20185 
20186 // CHECK-LABEL: @test_vtbx4_u8(
20187 // CHECK:   [[__P1_I:%.*]] = alloca %struct.uint8x8x4_t, align 8
20188 // CHECK:   [[B:%.*]] = alloca %struct.uint8x8x4_t, align 8
20189 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20190 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20191 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20192 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[B]], i32 0, i32 0
20193 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20194 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20195 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20196 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20197 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20198 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20199 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20200 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20201 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20202 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20203 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20204 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20205 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20206 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20207 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.uint8x8x4_t, %struct.uint8x8x4_t* [[__P1_I]], i32 0, i32 0
20208 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20209 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20210 // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20211 // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vtbx4_u8(uint8x8_t a,uint8x8x4_t b,uint8x8_t c)20212 uint8x8_t test_vtbx4_u8(uint8x8_t a, uint8x8x4_t b, uint8x8_t c) {
20213   return vtbx4_u8(a, b, c);
20214 }
20215 
20216 // CHECK-LABEL: @test_vtbx4_s8(
20217 // CHECK:   [[__P1_I:%.*]] = alloca %struct.int8x8x4_t, align 8
20218 // CHECK:   [[B:%.*]] = alloca %struct.int8x8x4_t, align 8
20219 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20220 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20221 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20222 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[B]], i32 0, i32 0
20223 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20224 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20225 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20226 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20227 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20228 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20229 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20230 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20231 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20232 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20233 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20234 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20235 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20236 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20237 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.int8x8x4_t, %struct.int8x8x4_t* [[__P1_I]], i32 0, i32 0
20238 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20239 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20240 // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20241 // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vtbx4_s8(int8x8_t a,int8x8x4_t b,int8x8_t c)20242 int8x8_t test_vtbx4_s8(int8x8_t a, int8x8x4_t b, int8x8_t c) {
20243   return vtbx4_s8(a, b, c);
20244 }
20245 
20246 // CHECK-LABEL: @test_vtbx4_p8(
20247 // CHECK:   [[__P1_I:%.*]] = alloca %struct.poly8x8x4_t, align 8
20248 // CHECK:   [[B:%.*]] = alloca %struct.poly8x8x4_t, align 8
20249 // CHECK:   [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20250 // CHECK:   [[TMP0:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE]] to [4 x i64]*
20251 // CHECK:   store [4 x i64] [[B]].coerce, [4 x i64]* [[TMP0]], align 8
20252 // CHECK:   [[COERCE_DIVE1:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[B]], i32 0, i32 0
20253 // CHECK:   [[TMP1:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE1]] to [4 x i64]*
20254 // CHECK:   [[TMP2:%.*]] = load [4 x i64], [4 x i64]* [[TMP1]], align 8
20255 // CHECK:   [[COERCE_DIVE_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20256 // CHECK:   [[TMP3:%.*]] = bitcast [4 x <8 x i8>]* [[COERCE_DIVE_I]] to [4 x i64]*
20257 // CHECK:   store [4 x i64] [[TMP2]], [4 x i64]* [[TMP3]], align 8
20258 // CHECK:   [[VAL_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20259 // CHECK:   [[ARRAYIDX_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL_I]], i32 0, i32 0
20260 // CHECK:   [[TMP4:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX_I]], align 8
20261 // CHECK:   [[VAL1_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20262 // CHECK:   [[ARRAYIDX2_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL1_I]], i32 0, i32 1
20263 // CHECK:   [[TMP5:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX2_I]], align 8
20264 // CHECK:   [[VAL3_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20265 // CHECK:   [[ARRAYIDX4_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL3_I]], i32 0, i32 2
20266 // CHECK:   [[TMP6:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX4_I]], align 8
20267 // CHECK:   [[VAL5_I:%.*]] = getelementptr inbounds %struct.poly8x8x4_t, %struct.poly8x8x4_t* [[__P1_I]], i32 0, i32 0
20268 // CHECK:   [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [4 x <8 x i8>], [4 x <8 x i8>]* [[VAL5_I]], i32 0, i32 3
20269 // CHECK:   [[TMP7:%.*]] = load <8 x i8>, <8 x i8>* [[ARRAYIDX6_I]], align 8
20270 // CHECK:   [[VTBX4_I:%.*]] = call <8 x i8> @llvm.arm.neon.vtbx4(<8 x i8> %a, <8 x i8> [[TMP4]], <8 x i8> [[TMP5]], <8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> %c)
20271 // CHECK:   ret <8 x i8> [[VTBX4_I]]
test_vtbx4_p8(poly8x8_t a,poly8x8x4_t b,uint8x8_t c)20272 poly8x8_t test_vtbx4_p8(poly8x8_t a, poly8x8x4_t b, uint8x8_t c) {
20273   return vtbx4_p8(a, b, c);
20274 }
20275 
20276 // CHECK: @test_vtrn_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20277 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20278 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20279 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20280 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !3
20281 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20282 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20283 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !3
20284 // CHECK:   ret void
test_vtrn_s8(int8x8_t a,int8x8_t b)20285 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
20286   return vtrn_s8(a, b);
20287 }
20288 
20289 // CHECK: @test_vtrn_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20290 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20291 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20292 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20293 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20294 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20295 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !6
20296 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20297 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20298 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !6
20299 // CHECK:   ret void
test_vtrn_s16(int16x4_t a,int16x4_t b)20300 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
20301   return vtrn_s16(a, b);
20302 }
20303 
20304 // CHECK: @test_vtrn_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20305 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20306 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20307 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20308 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20309 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20310 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !9
20311 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20312 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20313 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !9
20314 // CHECK:   ret void
test_vtrn_s32(int32x2_t a,int32x2_t b)20315 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
20316   return vtrn_s32(a, b);
20317 }
20318 
20319 // CHECK: @test_vtrn_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20320 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20321 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20322 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20323 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !12
20324 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20325 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20326 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !12
20327 // CHECK:   ret void
test_vtrn_u8(uint8x8_t a,uint8x8_t b)20328 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
20329   return vtrn_u8(a, b);
20330 }
20331 
20332 // CHECK: @test_vtrn_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20333 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20334 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20335 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20336 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20337 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20338 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !15
20339 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20340 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20341 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !15
20342 // CHECK:   ret void
test_vtrn_u16(uint16x4_t a,uint16x4_t b)20343 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
20344   return vtrn_u16(a, b);
20345 }
20346 
20347 // CHECK: @test_vtrn_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20348 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20349 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20350 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20351 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20352 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20353 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !18
20354 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20355 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20356 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !18
20357 // CHECK:   ret void
test_vtrn_u32(uint32x2_t a,uint32x2_t b)20358 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
20359   return vtrn_u32(a, b);
20360 }
20361 
20362 // CHECK: @test_vtrn_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20363 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20364 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20365 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20366 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20367 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20368 // CHECK:   store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !21
20369 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20370 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20371 // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !21
20372 // CHECK:   ret void
test_vtrn_f32(float32x2_t a,float32x2_t b)20373 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
20374   return vtrn_f32(a, b);
20375 }
20376 
20377 // CHECK: @test_vtrn_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20378 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20379 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20380 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20381 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !24
20382 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20383 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20384 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !24
20385 // CHECK:   ret void
test_vtrn_p8(poly8x8_t a,poly8x8_t b)20386 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
20387   return vtrn_p8(a, b);
20388 }
20389 
20390 // CHECK: @test_vtrn_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20391 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20392 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20393 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20394 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20395 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20396 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !27
20397 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20398 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20399 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !27
20400 // CHECK:   ret void
test_vtrn_p16(poly16x4_t a,poly16x4_t b)20401 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
20402   return vtrn_p16(a, b);
20403 }
20404 
20405 // CHECK: @test_vtrnq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20406 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20407 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20408 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20409 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !30
20410 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20411 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20412 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !30
20413 // CHECK:   ret void
test_vtrnq_s8(int8x16_t a,int8x16_t b)20414 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
20415   return vtrnq_s8(a, b);
20416 }
20417 
20418 // CHECK: @test_vtrnq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20419 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20420 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20421 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20422 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20423 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20424 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !33
20425 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20426 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20427 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !33
20428 // CHECK:   ret void
test_vtrnq_s16(int16x8_t a,int16x8_t b)20429 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
20430   return vtrnq_s16(a, b);
20431 }
20432 
20433 // CHECK: @test_vtrnq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20434 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20435 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20436 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20437 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20438 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20439 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !36
20440 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20441 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20442 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !36
20443 // CHECK:   ret void
test_vtrnq_s32(int32x4_t a,int32x4_t b)20444 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
20445   return vtrnq_s32(a, b);
20446 }
20447 
20448 // CHECK: @test_vtrnq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20449 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20450 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20451 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20452 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !39
20453 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20454 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20455 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !39
20456 // CHECK:   ret void
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)20457 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
20458   return vtrnq_u8(a, b);
20459 }
20460 
20461 // CHECK: @test_vtrnq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20462 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20463 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20464 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20465 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20466 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20467 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !42
20468 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20469 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20470 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !42
20471 // CHECK:   ret void
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)20472 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
20473   return vtrnq_u16(a, b);
20474 }
20475 
20476 // CHECK: @test_vtrnq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20477 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20478 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20479 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20480 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20481 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20482 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !45
20483 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20484 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20485 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !45
20486 // CHECK:   ret void
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)20487 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
20488   return vtrnq_u32(a, b);
20489 }
20490 
20491 // CHECK: @test_vtrnq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20492 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20493 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20494 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20495 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20496 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
20497 // CHECK:   store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !48
20498 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20499 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
20500 // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !48
20501 // CHECK:   ret void
test_vtrnq_f32(float32x4_t a,float32x4_t b)20502 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
20503   return vtrnq_f32(a, b);
20504 }
20505 
20506 // CHECK: @test_vtrnq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20507 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20508 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20509 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
20510 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !51
20511 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20512 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
20513 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !51
20514 // CHECK:   ret void
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)20515 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
20516   return vtrnq_p8(a, b);
20517 }
20518 
20519 // CHECK: @test_vtrnq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20520 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20521 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20522 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20523 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20524 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
20525 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !54
20526 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20527 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
20528 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !54
20529 // CHECK:   ret void
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)20530 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
20531   return vtrnq_p16(a, b);
20532 }
20533 
20534 // CHECK-LABEL: @test_vtst_s8(
20535 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %a, %b
20536 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20537 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20538 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_s8(int8x8_t a,int8x8_t b)20539 uint8x8_t test_vtst_s8(int8x8_t a, int8x8_t b) {
20540   return vtst_s8(a, b);
20541 }
20542 
20543 // CHECK-LABEL: @test_vtst_s16(
20544 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20545 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20546 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %a, %b
20547 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20548 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20549 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_s16(int16x4_t a,int16x4_t b)20550 uint16x4_t test_vtst_s16(int16x4_t a, int16x4_t b) {
20551   return vtst_s16(a, b);
20552 }
20553 
20554 // CHECK-LABEL: @test_vtst_s32(
20555 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20556 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20557 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %a, %b
20558 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20559 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20560 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_s32(int32x2_t a,int32x2_t b)20561 uint32x2_t test_vtst_s32(int32x2_t a, int32x2_t b) {
20562   return vtst_s32(a, b);
20563 }
20564 
20565 // CHECK-LABEL: @test_vtst_u8(
20566 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %a, %b
20567 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20568 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20569 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_u8(uint8x8_t a,uint8x8_t b)20570 uint8x8_t test_vtst_u8(uint8x8_t a, uint8x8_t b) {
20571   return vtst_u8(a, b);
20572 }
20573 
20574 // CHECK-LABEL: @test_vtst_u16(
20575 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20576 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20577 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %a, %b
20578 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20579 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20580 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_u16(uint16x4_t a,uint16x4_t b)20581 uint16x4_t test_vtst_u16(uint16x4_t a, uint16x4_t b) {
20582   return vtst_u16(a, b);
20583 }
20584 
20585 // CHECK-LABEL: @test_vtst_u32(
20586 // CHECK:   [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20587 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20588 // CHECK:   [[TMP2:%.*]] = and <2 x i32> %a, %b
20589 // CHECK:   [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
20590 // CHECK:   [[VTST_I:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
20591 // CHECK:   ret <2 x i32> [[VTST_I]]
test_vtst_u32(uint32x2_t a,uint32x2_t b)20592 uint32x2_t test_vtst_u32(uint32x2_t a, uint32x2_t b) {
20593   return vtst_u32(a, b);
20594 }
20595 
20596 // CHECK-LABEL: @test_vtst_p8(
20597 // CHECK:   [[TMP0:%.*]] = and <8 x i8> %a, %b
20598 // CHECK:   [[TMP1:%.*]] = icmp ne <8 x i8> [[TMP0]], zeroinitializer
20599 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i8>
20600 // CHECK:   ret <8 x i8> [[VTST_I]]
test_vtst_p8(poly8x8_t a,poly8x8_t b)20601 uint8x8_t test_vtst_p8(poly8x8_t a, poly8x8_t b) {
20602   return vtst_p8(a, b);
20603 }
20604 
20605 // CHECK-LABEL: @test_vtst_p16(
20606 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20607 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20608 // CHECK:   [[TMP2:%.*]] = and <4 x i16> %a, %b
20609 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i16> [[TMP2]], zeroinitializer
20610 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i16>
20611 // CHECK:   ret <4 x i16> [[VTST_I]]
test_vtst_p16(poly16x4_t a,poly16x4_t b)20612 uint16x4_t test_vtst_p16(poly16x4_t a, poly16x4_t b) {
20613   return vtst_p16(a, b);
20614 }
20615 
20616 // CHECK-LABEL: @test_vtstq_s8(
20617 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %a, %b
20618 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20619 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20620 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_s8(int8x16_t a,int8x16_t b)20621 uint8x16_t test_vtstq_s8(int8x16_t a, int8x16_t b) {
20622   return vtstq_s8(a, b);
20623 }
20624 
20625 // CHECK-LABEL: @test_vtstq_s16(
20626 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20627 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20628 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %a, %b
20629 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20630 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20631 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_s16(int16x8_t a,int16x8_t b)20632 uint16x8_t test_vtstq_s16(int16x8_t a, int16x8_t b) {
20633   return vtstq_s16(a, b);
20634 }
20635 
20636 // CHECK-LABEL: @test_vtstq_s32(
20637 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20638 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20639 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %a, %b
20640 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20641 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20642 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_s32(int32x4_t a,int32x4_t b)20643 uint32x4_t test_vtstq_s32(int32x4_t a, int32x4_t b) {
20644   return vtstq_s32(a, b);
20645 }
20646 
20647 // CHECK-LABEL: @test_vtstq_u8(
20648 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %a, %b
20649 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20650 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20651 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_u8(uint8x16_t a,uint8x16_t b)20652 uint8x16_t test_vtstq_u8(uint8x16_t a, uint8x16_t b) {
20653   return vtstq_u8(a, b);
20654 }
20655 
20656 // CHECK-LABEL: @test_vtstq_u16(
20657 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20658 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20659 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %a, %b
20660 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20661 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20662 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_u16(uint16x8_t a,uint16x8_t b)20663 uint16x8_t test_vtstq_u16(uint16x8_t a, uint16x8_t b) {
20664   return vtstq_u16(a, b);
20665 }
20666 
20667 // CHECK-LABEL: @test_vtstq_u32(
20668 // CHECK:   [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20669 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20670 // CHECK:   [[TMP2:%.*]] = and <4 x i32> %a, %b
20671 // CHECK:   [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
20672 // CHECK:   [[VTST_I:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
20673 // CHECK:   ret <4 x i32> [[VTST_I]]
test_vtstq_u32(uint32x4_t a,uint32x4_t b)20674 uint32x4_t test_vtstq_u32(uint32x4_t a, uint32x4_t b) {
20675   return vtstq_u32(a, b);
20676 }
20677 
20678 // CHECK-LABEL: @test_vtstq_p8(
20679 // CHECK:   [[TMP0:%.*]] = and <16 x i8> %a, %b
20680 // CHECK:   [[TMP1:%.*]] = icmp ne <16 x i8> [[TMP0]], zeroinitializer
20681 // CHECK:   [[VTST_I:%.*]] = sext <16 x i1> [[TMP1]] to <16 x i8>
20682 // CHECK:   ret <16 x i8> [[VTST_I]]
test_vtstq_p8(poly8x16_t a,poly8x16_t b)20683 uint8x16_t test_vtstq_p8(poly8x16_t a, poly8x16_t b) {
20684   return vtstq_p8(a, b);
20685 }
20686 
20687 // CHECK-LABEL: @test_vtstq_p16(
20688 // CHECK:   [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20689 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20690 // CHECK:   [[TMP2:%.*]] = and <8 x i16> %a, %b
20691 // CHECK:   [[TMP3:%.*]] = icmp ne <8 x i16> [[TMP2]], zeroinitializer
20692 // CHECK:   [[VTST_I:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i16>
20693 // CHECK:   ret <8 x i16> [[VTST_I]]
test_vtstq_p16(poly16x8_t a,poly16x8_t b)20694 uint16x8_t test_vtstq_p16(poly16x8_t a, poly16x8_t b) {
20695   return vtstq_p16(a, b);
20696 }
20697 
20698 // CHECK: @test_vuzp_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20699 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20700 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20701 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20702 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !57
20703 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20704 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20705 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !57
20706 // CHECK:   ret void
test_vuzp_s8(int8x8_t a,int8x8_t b)20707 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
20708   return vuzp_s8(a, b);
20709 }
20710 
20711 // CHECK: @test_vuzp_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20712 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20713 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20714 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20715 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20716 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20717 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !60
20718 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20719 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20720 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !60
20721 // CHECK:   ret void
test_vuzp_s16(int16x4_t a,int16x4_t b)20722 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
20723   return vuzp_s16(a, b);
20724 }
20725 
20726 // CHECK: @test_vuzp_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20727 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20728 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20729 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20730 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20731 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20732 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !63
20733 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20734 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20735 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !63
20736 // CHECK:   ret void
test_vuzp_s32(int32x2_t a,int32x2_t b)20737 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
20738   return vuzp_s32(a, b);
20739 }
20740 
20741 // CHECK: @test_vuzp_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20742 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
20743 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20744 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20745 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !66
20746 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20747 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20748 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !66
20749 // CHECK:   ret void
test_vuzp_u8(uint8x8_t a,uint8x8_t b)20750 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
20751   return vuzp_u8(a, b);
20752 }
20753 
20754 // CHECK: @test_vuzp_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20755 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
20756 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20757 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20758 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20759 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20760 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !69
20761 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20762 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20763 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !69
20764 // CHECK:   ret void
test_vuzp_u16(uint16x4_t a,uint16x4_t b)20765 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
20766   return vuzp_u16(a, b);
20767 }
20768 
20769 // CHECK: @test_vuzp_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20770 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
20771 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20772 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20773 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20774 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20775 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !72
20776 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20777 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20778 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !72
20779 // CHECK:   ret void
test_vuzp_u32(uint32x2_t a,uint32x2_t b)20780 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
20781   return vuzp_u32(a, b);
20782 }
20783 
20784 // CHECK: @test_vuzp_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20785 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
20786 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
20787 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
20788 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
20789 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
20790 // CHECK:   store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !75
20791 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
20792 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
20793 // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !75
20794 // CHECK:   ret void
test_vuzp_f32(float32x2_t a,float32x2_t b)20795 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
20796   return vuzp_f32(a, b);
20797 }
20798 
20799 // CHECK: @test_vuzp_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20800 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
20801 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20802 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20803 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !78
20804 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20805 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20806 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !78
20807 // CHECK:   ret void
test_vuzp_p8(poly8x8_t a,poly8x8_t b)20808 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
20809   return vuzp_p8(a, b);
20810 }
20811 
20812 // CHECK: @test_vuzp_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20813 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
20814 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20815 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20816 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20817 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20818 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !81
20819 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20820 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20821 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !81
20822 // CHECK:   ret void
test_vuzp_p16(poly16x4_t a,poly16x4_t b)20823 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
20824   return vuzp_p16(a, b);
20825 }
20826 
20827 // CHECK: @test_vuzpq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20828 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
20829 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20830 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20831 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !84
20832 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20833 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20834 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !84
20835 // CHECK:   ret void
test_vuzpq_s8(int8x16_t a,int8x16_t b)20836 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
20837   return vuzpq_s8(a, b);
20838 }
20839 
20840 // CHECK: @test_vuzpq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20841 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
20842 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20843 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20844 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20845 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20846 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !87
20847 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20848 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20849 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !87
20850 // CHECK:   ret void
test_vuzpq_s16(int16x8_t a,int16x8_t b)20851 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
20852   return vuzpq_s16(a, b);
20853 }
20854 
20855 // CHECK: @test_vuzpq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20856 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
20857 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20858 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20859 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20860 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20861 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !90
20862 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20863 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20864 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !90
20865 // CHECK:   ret void
test_vuzpq_s32(int32x4_t a,int32x4_t b)20866 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
20867   return vuzpq_s32(a, b);
20868 }
20869 
20870 // CHECK: @test_vuzpq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20871 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
20872 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20873 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20874 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !93
20875 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20876 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20877 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !93
20878 // CHECK:   ret void
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)20879 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
20880   return vuzpq_u8(a, b);
20881 }
20882 
20883 // CHECK: @test_vuzpq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20884 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
20885 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20886 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20887 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20888 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20889 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !96
20890 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20891 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20892 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !96
20893 // CHECK:   ret void
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)20894 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
20895   return vuzpq_u16(a, b);
20896 }
20897 
20898 // CHECK: @test_vuzpq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20899 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
20900 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
20901 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
20902 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
20903 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20904 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !99
20905 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
20906 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20907 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !99
20908 // CHECK:   ret void
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)20909 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
20910   return vuzpq_u32(a, b);
20911 }
20912 
20913 // CHECK: @test_vuzpq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20914 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
20915 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
20916 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
20917 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
20918 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
20919 // CHECK:   store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !102
20920 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
20921 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
20922 // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !102
20923 // CHECK:   ret void
test_vuzpq_f32(float32x4_t a,float32x4_t b)20924 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
20925   return vuzpq_f32(a, b);
20926 }
20927 
20928 // CHECK: @test_vuzpq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20929 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
20930 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
20931 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
20932 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !105
20933 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
20934 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
20935 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !105
20936 // CHECK:   ret void
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)20937 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
20938   return vuzpq_p8(a, b);
20939 }
20940 
20941 // CHECK: @test_vuzpq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20942 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
20943 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
20944 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
20945 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
20946 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
20947 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !108
20948 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
20949 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
20950 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !108
20951 // CHECK:   ret void
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)20952 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
20953   return vuzpq_p16(a, b);
20954 }
20955 
20956 // CHECK: @test_vzip_s8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20957 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[AGG_RESULT]] to i8*
20958 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
20959 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
20960 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !111
20961 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
20962 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
20963 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !111
20964 // CHECK:   ret void
test_vzip_s8(int8x8_t a,int8x8_t b)20965 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
20966   return vzip_s8(a, b);
20967 }
20968 
20969 // CHECK: @test_vzip_s16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20970 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[AGG_RESULT]] to i8*
20971 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
20972 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
20973 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
20974 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
20975 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !114
20976 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
20977 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
20978 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !114
20979 // CHECK:   ret void
test_vzip_s16(int16x4_t a,int16x4_t b)20980 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
20981   return vzip_s16(a, b);
20982 }
20983 
20984 // CHECK: @test_vzip_s32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
20985 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[AGG_RESULT]] to i8*
20986 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
20987 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
20988 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
20989 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
20990 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !117
20991 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
20992 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
20993 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !117
20994 // CHECK:   ret void
test_vzip_s32(int32x2_t a,int32x2_t b)20995 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
20996   return vzip_s32(a, b);
20997 }
20998 
20999 // CHECK: @test_vzip_u8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21000 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[AGG_RESULT]] to i8*
21001 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
21002 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21003 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !120
21004 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
21005 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21006 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !120
21007 // CHECK:   ret void
test_vzip_u8(uint8x8_t a,uint8x8_t b)21008 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
21009   return vzip_u8(a, b);
21010 }
21011 
21012 // CHECK: @test_vzip_u16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21013 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[AGG_RESULT]] to i8*
21014 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21015 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21016 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
21017 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21018 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !123
21019 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
21020 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21021 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !123
21022 // CHECK:   ret void
test_vzip_u16(uint16x4_t a,uint16x4_t b)21023 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
21024   return vzip_u16(a, b);
21025 }
21026 
21027 // CHECK: @test_vzip_u32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21028 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[AGG_RESULT]] to i8*
21029 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
21030 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
21031 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
21032 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
21033 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]], align 4, !alias.scope !126
21034 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
21035 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
21036 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]], align 4, !alias.scope !126
21037 // CHECK:   ret void
test_vzip_u32(uint32x2_t a,uint32x2_t b)21038 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
21039   return vzip_u32(a, b);
21040 }
21041 
21042 // CHECK: @test_vzip_f32({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21043 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[AGG_RESULT]] to i8*
21044 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
21045 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
21046 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
21047 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
21048 // CHECK:   store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]], align 4, !alias.scope !129
21049 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
21050 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
21051 // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]], align 4, !alias.scope !129
21052 // CHECK:   ret void
test_vzip_f32(float32x2_t a,float32x2_t b)21053 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
21054   return vzip_f32(a, b);
21055 }
21056 
21057 // CHECK: @test_vzip_p8({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21058 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[AGG_RESULT]] to i8*
21059 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
21060 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21061 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]], align 4, !alias.scope !132
21062 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
21063 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21064 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]], align 4, !alias.scope !132
21065 // CHECK:   ret void
test_vzip_p8(poly8x8_t a,poly8x8_t b)21066 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
21067   return vzip_p8(a, b);
21068 }
21069 
21070 // CHECK: @test_vzip_p16({{.*}} sret({{.*}}) align 8 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21071 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[AGG_RESULT]] to i8*
21072 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
21073 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
21074 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
21075 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21076 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]], align 4, !alias.scope !135
21077 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
21078 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21079 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]], align 4, !alias.scope !135
21080 // CHECK:   ret void
test_vzip_p16(poly16x4_t a,poly16x4_t b)21081 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
21082   return vzip_p16(a, b);
21083 }
21084 
21085 // CHECK: @test_vzipq_s8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21086 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[AGG_RESULT]] to i8*
21087 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21088 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21089 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !138
21090 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21091 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21092 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !138
21093 // CHECK:   ret void
test_vzipq_s8(int8x16_t a,int8x16_t b)21094 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
21095   return vzipq_s8(a, b);
21096 }
21097 
21098 // CHECK: @test_vzipq_s16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21099 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[AGG_RESULT]] to i8*
21100 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21101 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21102 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21103 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21104 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !141
21105 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21106 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21107 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !141
21108 // CHECK:   ret void
test_vzipq_s16(int16x8_t a,int16x8_t b)21109 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
21110   return vzipq_s16(a, b);
21111 }
21112 
21113 // CHECK: @test_vzipq_s32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21114 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[AGG_RESULT]] to i8*
21115 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21116 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21117 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21118 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21119 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !144
21120 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21121 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21122 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !144
21123 // CHECK:   ret void
test_vzipq_s32(int32x4_t a,int32x4_t b)21124 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
21125   return vzipq_s32(a, b);
21126 }
21127 
21128 // CHECK: @test_vzipq_u8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21129 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[AGG_RESULT]] to i8*
21130 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21131 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21132 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !147
21133 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21134 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21135 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !147
21136 // CHECK:   ret void
test_vzipq_u8(uint8x16_t a,uint8x16_t b)21137 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
21138   return vzipq_u8(a, b);
21139 }
21140 
21141 // CHECK: @test_vzipq_u16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21142 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[AGG_RESULT]] to i8*
21143 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21144 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21145 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21146 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21147 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !150
21148 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21149 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21150 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !150
21151 // CHECK:   ret void
test_vzipq_u16(uint16x8_t a,uint16x8_t b)21152 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
21153   return vzipq_u16(a, b);
21154 }
21155 
21156 // CHECK: @test_vzipq_u32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21157 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[AGG_RESULT]] to i8*
21158 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
21159 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
21160 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
21161 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21162 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]], align 4, !alias.scope !153
21163 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
21164 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21165 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]], align 4, !alias.scope !153
21166 // CHECK:   ret void
test_vzipq_u32(uint32x4_t a,uint32x4_t b)21167 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
21168   return vzipq_u32(a, b);
21169 }
21170 
21171 // CHECK: @test_vzipq_f32({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21172 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[AGG_RESULT]] to i8*
21173 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
21174 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
21175 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
21176 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21177 // CHECK:   store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]], align 4, !alias.scope !156
21178 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
21179 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
21180 // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]], align 4, !alias.scope !156
21181 // CHECK:   ret void
test_vzipq_f32(float32x4_t a,float32x4_t b)21182 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
21183   return vzipq_f32(a, b);
21184 }
21185 
21186 // CHECK: @test_vzipq_p8({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21187 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[AGG_RESULT]] to i8*
21188 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
21189 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
21190 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]], align 4, !alias.scope !159
21191 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
21192 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
21193 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]], align 4, !alias.scope !159
21194 // CHECK:   ret void
test_vzipq_p8(poly8x16_t a,poly8x16_t b)21195 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
21196   return vzipq_p8(a, b);
21197 }
21198 
21199 // CHECK: @test_vzipq_p16({{.*}} sret({{.*}}) align 16 [[AGG_RESULT:%[0-9a-zA-Z.]+]],
21200 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[AGG_RESULT]] to i8*
21201 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
21202 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
21203 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
21204 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
21205 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]], align 4, !alias.scope !162
21206 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
21207 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
21208 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]], align 4, !alias.scope !162
21209 // CHECK:   ret void
test_vzipq_p16(poly16x8_t a,poly16x8_t b)21210 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
21211   return vzipq_p16(a, b);
21212 }
21213