1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3 
4 // Test new aarch64 intrinsics and types
5 #include <arm_neon.h>
6 
7 // CHECK-LABEL: @test_vuzp1_s8(
8 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
9 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_s8(int8x8_t a,int8x8_t b)10 int8x8_t test_vuzp1_s8(int8x8_t a, int8x8_t b) {
11   return vuzp1_s8(a, b);
12 }
13 
14 // CHECK-LABEL: @test_vuzp1q_s8(
15 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
16 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_s8(int8x16_t a,int8x16_t b)17 int8x16_t test_vuzp1q_s8(int8x16_t a, int8x16_t b) {
18   return vuzp1q_s8(a, b);
19 }
20 
21 // CHECK-LABEL: @test_vuzp1_s16(
22 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
23 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_s16(int16x4_t a,int16x4_t b)24 int16x4_t test_vuzp1_s16(int16x4_t a, int16x4_t b) {
25   return vuzp1_s16(a, b);
26 }
27 
28 // CHECK-LABEL: @test_vuzp1q_s16(
29 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
30 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_s16(int16x8_t a,int16x8_t b)31 int16x8_t test_vuzp1q_s16(int16x8_t a, int16x8_t b) {
32   return vuzp1q_s16(a, b);
33 }
34 
35 // CHECK-LABEL: @test_vuzp1_s32(
36 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
37 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp1_s32(int32x2_t a,int32x2_t b)38 int32x2_t test_vuzp1_s32(int32x2_t a, int32x2_t b) {
39   return vuzp1_s32(a, b);
40 }
41 
42 // CHECK-LABEL: @test_vuzp1q_s32(
43 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
44 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp1q_s32(int32x4_t a,int32x4_t b)45 int32x4_t test_vuzp1q_s32(int32x4_t a, int32x4_t b) {
46   return vuzp1q_s32(a, b);
47 }
48 
49 // CHECK-LABEL: @test_vuzp1q_s64(
50 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
51 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_s64(int64x2_t a,int64x2_t b)52 int64x2_t test_vuzp1q_s64(int64x2_t a, int64x2_t b) {
53   return vuzp1q_s64(a, b);
54 }
55 
56 // CHECK-LABEL: @test_vuzp1_u8(
57 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
58 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_u8(uint8x8_t a,uint8x8_t b)59 uint8x8_t test_vuzp1_u8(uint8x8_t a, uint8x8_t b) {
60   return vuzp1_u8(a, b);
61 }
62 
63 // CHECK-LABEL: @test_vuzp1q_u8(
64 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
65 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_u8(uint8x16_t a,uint8x16_t b)66 uint8x16_t test_vuzp1q_u8(uint8x16_t a, uint8x16_t b) {
67   return vuzp1q_u8(a, b);
68 }
69 
70 // CHECK-LABEL: @test_vuzp1_u16(
71 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
72 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_u16(uint16x4_t a,uint16x4_t b)73 uint16x4_t test_vuzp1_u16(uint16x4_t a, uint16x4_t b) {
74   return vuzp1_u16(a, b);
75 }
76 
77 // CHECK-LABEL: @test_vuzp1q_u16(
78 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
79 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_u16(uint16x8_t a,uint16x8_t b)80 uint16x8_t test_vuzp1q_u16(uint16x8_t a, uint16x8_t b) {
81   return vuzp1q_u16(a, b);
82 }
83 
84 // CHECK-LABEL: @test_vuzp1_u32(
85 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
86 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp1_u32(uint32x2_t a,uint32x2_t b)87 uint32x2_t test_vuzp1_u32(uint32x2_t a, uint32x2_t b) {
88   return vuzp1_u32(a, b);
89 }
90 
91 // CHECK-LABEL: @test_vuzp1q_u32(
92 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
93 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp1q_u32(uint32x4_t a,uint32x4_t b)94 uint32x4_t test_vuzp1q_u32(uint32x4_t a, uint32x4_t b) {
95   return vuzp1q_u32(a, b);
96 }
97 
98 // CHECK-LABEL: @test_vuzp1q_u64(
99 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
100 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_u64(uint64x2_t a,uint64x2_t b)101 uint64x2_t test_vuzp1q_u64(uint64x2_t a, uint64x2_t b) {
102   return vuzp1q_u64(a, b);
103 }
104 
105 // CHECK-LABEL: @test_vuzp1_f32(
106 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
107 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vuzp1_f32(float32x2_t a,float32x2_t b)108 float32x2_t test_vuzp1_f32(float32x2_t a, float32x2_t b) {
109   return vuzp1_f32(a, b);
110 }
111 
112 // CHECK-LABEL: @test_vuzp1q_f32(
113 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
114 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vuzp1q_f32(float32x4_t a,float32x4_t b)115 float32x4_t test_vuzp1q_f32(float32x4_t a, float32x4_t b) {
116   return vuzp1q_f32(a, b);
117 }
118 
119 // CHECK-LABEL: @test_vuzp1q_f64(
120 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
121 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vuzp1q_f64(float64x2_t a,float64x2_t b)122 float64x2_t test_vuzp1q_f64(float64x2_t a, float64x2_t b) {
123   return vuzp1q_f64(a, b);
124 }
125 
126 // CHECK-LABEL: @test_vuzp1_p8(
127 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
128 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp1_p8(poly8x8_t a,poly8x8_t b)129 poly8x8_t test_vuzp1_p8(poly8x8_t a, poly8x8_t b) {
130   return vuzp1_p8(a, b);
131 }
132 
133 // CHECK-LABEL: @test_vuzp1q_p8(
134 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
135 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp1q_p8(poly8x16_t a,poly8x16_t b)136 poly8x16_t test_vuzp1q_p8(poly8x16_t a, poly8x16_t b) {
137   return vuzp1q_p8(a, b);
138 }
139 
140 // CHECK-LABEL: @test_vuzp1_p16(
141 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
142 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp1_p16(poly16x4_t a,poly16x4_t b)143 poly16x4_t test_vuzp1_p16(poly16x4_t a, poly16x4_t b) {
144   return vuzp1_p16(a, b);
145 }
146 
147 // CHECK-LABEL: @test_vuzp1q_p16(
148 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
149 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp1q_p16(poly16x8_t a,poly16x8_t b)150 poly16x8_t test_vuzp1q_p16(poly16x8_t a, poly16x8_t b) {
151   return vuzp1q_p16(a, b);
152 }
153 
154 // CHECK-LABEL: @test_vuzp2_s8(
155 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
156 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_s8(int8x8_t a,int8x8_t b)157 int8x8_t test_vuzp2_s8(int8x8_t a, int8x8_t b) {
158   return vuzp2_s8(a, b);
159 }
160 
161 // CHECK-LABEL: @test_vuzp2q_s8(
162 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
163 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_s8(int8x16_t a,int8x16_t b)164 int8x16_t test_vuzp2q_s8(int8x16_t a, int8x16_t b) {
165   return vuzp2q_s8(a, b);
166 }
167 
168 // CHECK-LABEL: @test_vuzp2_s16(
169 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
170 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_s16(int16x4_t a,int16x4_t b)171 int16x4_t test_vuzp2_s16(int16x4_t a, int16x4_t b) {
172   return vuzp2_s16(a, b);
173 }
174 
175 // CHECK-LABEL: @test_vuzp2q_s16(
176 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
177 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_s16(int16x8_t a,int16x8_t b)178 int16x8_t test_vuzp2q_s16(int16x8_t a, int16x8_t b) {
179   return vuzp2q_s16(a, b);
180 }
181 
182 // CHECK-LABEL: @test_vuzp2_s32(
183 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
184 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp2_s32(int32x2_t a,int32x2_t b)185 int32x2_t test_vuzp2_s32(int32x2_t a, int32x2_t b) {
186   return vuzp2_s32(a, b);
187 }
188 
189 // CHECK-LABEL: @test_vuzp2q_s32(
190 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
191 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp2q_s32(int32x4_t a,int32x4_t b)192 int32x4_t test_vuzp2q_s32(int32x4_t a, int32x4_t b) {
193   return vuzp2q_s32(a, b);
194 }
195 
196 // CHECK-LABEL: @test_vuzp2q_s64(
197 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
198 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_s64(int64x2_t a,int64x2_t b)199 int64x2_t test_vuzp2q_s64(int64x2_t a, int64x2_t b) {
200   return vuzp2q_s64(a, b);
201 }
202 
203 // CHECK-LABEL: @test_vuzp2_u8(
204 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
205 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_u8(uint8x8_t a,uint8x8_t b)206 uint8x8_t test_vuzp2_u8(uint8x8_t a, uint8x8_t b) {
207   return vuzp2_u8(a, b);
208 }
209 
210 // CHECK-LABEL: @test_vuzp2q_u8(
211 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
212 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_u8(uint8x16_t a,uint8x16_t b)213 uint8x16_t test_vuzp2q_u8(uint8x16_t a, uint8x16_t b) {
214   return vuzp2q_u8(a, b);
215 }
216 
217 // CHECK-LABEL: @test_vuzp2_u16(
218 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
219 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_u16(uint16x4_t a,uint16x4_t b)220 uint16x4_t test_vuzp2_u16(uint16x4_t a, uint16x4_t b) {
221   return vuzp2_u16(a, b);
222 }
223 
224 // CHECK-LABEL: @test_vuzp2q_u16(
225 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
226 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_u16(uint16x8_t a,uint16x8_t b)227 uint16x8_t test_vuzp2q_u16(uint16x8_t a, uint16x8_t b) {
228   return vuzp2q_u16(a, b);
229 }
230 
231 // CHECK-LABEL: @test_vuzp2_u32(
232 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
233 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vuzp2_u32(uint32x2_t a,uint32x2_t b)234 uint32x2_t test_vuzp2_u32(uint32x2_t a, uint32x2_t b) {
235   return vuzp2_u32(a, b);
236 }
237 
238 // CHECK-LABEL: @test_vuzp2q_u32(
239 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
240 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vuzp2q_u32(uint32x4_t a,uint32x4_t b)241 uint32x4_t test_vuzp2q_u32(uint32x4_t a, uint32x4_t b) {
242   return vuzp2q_u32(a, b);
243 }
244 
245 // CHECK-LABEL: @test_vuzp2q_u64(
246 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
247 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_u64(uint64x2_t a,uint64x2_t b)248 uint64x2_t test_vuzp2q_u64(uint64x2_t a, uint64x2_t b) {
249   return vuzp2q_u64(a, b);
250 }
251 
252 // CHECK-LABEL: @test_vuzp2_f32(
253 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
254 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vuzp2_f32(float32x2_t a,float32x2_t b)255 float32x2_t test_vuzp2_f32(float32x2_t a, float32x2_t b) {
256   return vuzp2_f32(a, b);
257 }
258 
259 // CHECK-LABEL: @test_vuzp2q_f32(
260 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
261 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vuzp2q_f32(float32x4_t a,float32x4_t b)262 float32x4_t test_vuzp2q_f32(float32x4_t a, float32x4_t b) {
263   return vuzp2q_f32(a, b);
264 }
265 
266 // CHECK-LABEL: @test_vuzp2q_f64(
267 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
268 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vuzp2q_f64(float64x2_t a,float64x2_t b)269 float64x2_t test_vuzp2q_f64(float64x2_t a, float64x2_t b) {
270   return vuzp2q_f64(a, b);
271 }
272 
273 // CHECK-LABEL: @test_vuzp2_p8(
274 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
275 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vuzp2_p8(poly8x8_t a,poly8x8_t b)276 poly8x8_t test_vuzp2_p8(poly8x8_t a, poly8x8_t b) {
277   return vuzp2_p8(a, b);
278 }
279 
280 // CHECK-LABEL: @test_vuzp2q_p8(
281 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
282 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vuzp2q_p8(poly8x16_t a,poly8x16_t b)283 poly8x16_t test_vuzp2q_p8(poly8x16_t a, poly8x16_t b) {
284   return vuzp2q_p8(a, b);
285 }
286 
287 // CHECK-LABEL: @test_vuzp2_p16(
288 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
289 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vuzp2_p16(poly16x4_t a,poly16x4_t b)290 poly16x4_t test_vuzp2_p16(poly16x4_t a, poly16x4_t b) {
291   return vuzp2_p16(a, b);
292 }
293 
294 // CHECK-LABEL: @test_vuzp2q_p16(
295 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
296 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vuzp2q_p16(poly16x8_t a,poly16x8_t b)297 poly16x8_t test_vuzp2q_p16(poly16x8_t a, poly16x8_t b) {
298   return vuzp2q_p16(a, b);
299 }
300 
301 // CHECK-LABEL: @test_vzip1_s8(
302 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
303 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_s8(int8x8_t a,int8x8_t b)304 int8x8_t test_vzip1_s8(int8x8_t a, int8x8_t b) {
305   return vzip1_s8(a, b);
306 }
307 
308 // CHECK-LABEL: @test_vzip1q_s8(
309 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
310 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_s8(int8x16_t a,int8x16_t b)311 int8x16_t test_vzip1q_s8(int8x16_t a, int8x16_t b) {
312   return vzip1q_s8(a, b);
313 }
314 
315 // CHECK-LABEL: @test_vzip1_s16(
316 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
317 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_s16(int16x4_t a,int16x4_t b)318 int16x4_t test_vzip1_s16(int16x4_t a, int16x4_t b) {
319   return vzip1_s16(a, b);
320 }
321 
322 // CHECK-LABEL: @test_vzip1q_s16(
323 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
324 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_s16(int16x8_t a,int16x8_t b)325 int16x8_t test_vzip1q_s16(int16x8_t a, int16x8_t b) {
326   return vzip1q_s16(a, b);
327 }
328 
329 // CHECK-LABEL: @test_vzip1_s32(
330 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
331 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip1_s32(int32x2_t a,int32x2_t b)332 int32x2_t test_vzip1_s32(int32x2_t a, int32x2_t b) {
333   return vzip1_s32(a, b);
334 }
335 
336 // CHECK-LABEL: @test_vzip1q_s32(
337 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
338 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip1q_s32(int32x4_t a,int32x4_t b)339 int32x4_t test_vzip1q_s32(int32x4_t a, int32x4_t b) {
340   return vzip1q_s32(a, b);
341 }
342 
343 // CHECK-LABEL: @test_vzip1q_s64(
344 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
345 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_s64(int64x2_t a,int64x2_t b)346 int64x2_t test_vzip1q_s64(int64x2_t a, int64x2_t b) {
347   return vzip1q_s64(a, b);
348 }
349 
350 // CHECK-LABEL: @test_vzip1_u8(
351 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
352 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_u8(uint8x8_t a,uint8x8_t b)353 uint8x8_t test_vzip1_u8(uint8x8_t a, uint8x8_t b) {
354   return vzip1_u8(a, b);
355 }
356 
357 // CHECK-LABEL: @test_vzip1q_u8(
358 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
359 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_u8(uint8x16_t a,uint8x16_t b)360 uint8x16_t test_vzip1q_u8(uint8x16_t a, uint8x16_t b) {
361   return vzip1q_u8(a, b);
362 }
363 
364 // CHECK-LABEL: @test_vzip1_u16(
365 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
366 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_u16(uint16x4_t a,uint16x4_t b)367 uint16x4_t test_vzip1_u16(uint16x4_t a, uint16x4_t b) {
368   return vzip1_u16(a, b);
369 }
370 
371 // CHECK-LABEL: @test_vzip1q_u16(
372 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
373 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_u16(uint16x8_t a,uint16x8_t b)374 uint16x8_t test_vzip1q_u16(uint16x8_t a, uint16x8_t b) {
375   return vzip1q_u16(a, b);
376 }
377 
378 // CHECK-LABEL: @test_vzip1_u32(
379 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
380 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip1_u32(uint32x2_t a,uint32x2_t b)381 uint32x2_t test_vzip1_u32(uint32x2_t a, uint32x2_t b) {
382   return vzip1_u32(a, b);
383 }
384 
385 // CHECK-LABEL: @test_vzip1q_u32(
386 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
387 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip1q_u32(uint32x4_t a,uint32x4_t b)388 uint32x4_t test_vzip1q_u32(uint32x4_t a, uint32x4_t b) {
389   return vzip1q_u32(a, b);
390 }
391 
392 // CHECK-LABEL: @test_vzip1q_u64(
393 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
394 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_u64(uint64x2_t a,uint64x2_t b)395 uint64x2_t test_vzip1q_u64(uint64x2_t a, uint64x2_t b) {
396   return vzip1q_u64(a, b);
397 }
398 
399 // CHECK-LABEL: @test_vzip1_f32(
400 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
401 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vzip1_f32(float32x2_t a,float32x2_t b)402 float32x2_t test_vzip1_f32(float32x2_t a, float32x2_t b) {
403   return vzip1_f32(a, b);
404 }
405 
406 // CHECK-LABEL: @test_vzip1q_f32(
407 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
408 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vzip1q_f32(float32x4_t a,float32x4_t b)409 float32x4_t test_vzip1q_f32(float32x4_t a, float32x4_t b) {
410   return vzip1q_f32(a, b);
411 }
412 
413 // CHECK-LABEL: @test_vzip1q_f64(
414 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
415 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vzip1q_f64(float64x2_t a,float64x2_t b)416 float64x2_t test_vzip1q_f64(float64x2_t a, float64x2_t b) {
417   return vzip1q_f64(a, b);
418 }
419 
420 // CHECK-LABEL: @test_vzip1_p8(
421 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
422 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip1_p8(poly8x8_t a,poly8x8_t b)423 poly8x8_t test_vzip1_p8(poly8x8_t a, poly8x8_t b) {
424   return vzip1_p8(a, b);
425 }
426 
427 // CHECK-LABEL: @test_vzip1q_p8(
428 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
429 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip1q_p8(poly8x16_t a,poly8x16_t b)430 poly8x16_t test_vzip1q_p8(poly8x16_t a, poly8x16_t b) {
431   return vzip1q_p8(a, b);
432 }
433 
434 // CHECK-LABEL: @test_vzip1_p16(
435 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
436 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip1_p16(poly16x4_t a,poly16x4_t b)437 poly16x4_t test_vzip1_p16(poly16x4_t a, poly16x4_t b) {
438   return vzip1_p16(a, b);
439 }
440 
441 // CHECK-LABEL: @test_vzip1q_p16(
442 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
443 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip1q_p16(poly16x8_t a,poly16x8_t b)444 poly16x8_t test_vzip1q_p16(poly16x8_t a, poly16x8_t b) {
445   return vzip1q_p16(a, b);
446 }
447 
448 // CHECK-LABEL: @test_vzip2_s8(
449 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
450 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_s8(int8x8_t a,int8x8_t b)451 int8x8_t test_vzip2_s8(int8x8_t a, int8x8_t b) {
452   return vzip2_s8(a, b);
453 }
454 
455 // CHECK-LABEL: @test_vzip2q_s8(
456 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
457 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_s8(int8x16_t a,int8x16_t b)458 int8x16_t test_vzip2q_s8(int8x16_t a, int8x16_t b) {
459   return vzip2q_s8(a, b);
460 }
461 
462 // CHECK-LABEL: @test_vzip2_s16(
463 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
464 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_s16(int16x4_t a,int16x4_t b)465 int16x4_t test_vzip2_s16(int16x4_t a, int16x4_t b) {
466   return vzip2_s16(a, b);
467 }
468 
469 // CHECK-LABEL: @test_vzip2q_s16(
470 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
471 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_s16(int16x8_t a,int16x8_t b)472 int16x8_t test_vzip2q_s16(int16x8_t a, int16x8_t b) {
473   return vzip2q_s16(a, b);
474 }
475 
476 // CHECK-LABEL: @test_vzip2_s32(
477 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
478 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip2_s32(int32x2_t a,int32x2_t b)479 int32x2_t test_vzip2_s32(int32x2_t a, int32x2_t b) {
480   return vzip2_s32(a, b);
481 }
482 
483 // CHECK-LABEL: @test_vzip2q_s32(
484 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
485 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip2q_s32(int32x4_t a,int32x4_t b)486 int32x4_t test_vzip2q_s32(int32x4_t a, int32x4_t b) {
487   return vzip2q_s32(a, b);
488 }
489 
490 // CHECK-LABEL: @test_vzip2q_s64(
491 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
492 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_s64(int64x2_t a,int64x2_t b)493 int64x2_t test_vzip2q_s64(int64x2_t a, int64x2_t b) {
494   return vzip2q_s64(a, b);
495 }
496 
497 // CHECK-LABEL: @test_vzip2_u8(
498 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
499 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_u8(uint8x8_t a,uint8x8_t b)500 uint8x8_t test_vzip2_u8(uint8x8_t a, uint8x8_t b) {
501   return vzip2_u8(a, b);
502 }
503 
504 // CHECK-LABEL: @test_vzip2q_u8(
505 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
506 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_u8(uint8x16_t a,uint8x16_t b)507 uint8x16_t test_vzip2q_u8(uint8x16_t a, uint8x16_t b) {
508   return vzip2q_u8(a, b);
509 }
510 
511 // CHECK-LABEL: @test_vzip2_u16(
512 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
513 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_u16(uint16x4_t a,uint16x4_t b)514 uint16x4_t test_vzip2_u16(uint16x4_t a, uint16x4_t b) {
515   return vzip2_u16(a, b);
516 }
517 
518 // CHECK-LABEL: @test_vzip2q_u16(
519 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
520 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_u16(uint16x8_t a,uint16x8_t b)521 uint16x8_t test_vzip2q_u16(uint16x8_t a, uint16x8_t b) {
522   return vzip2q_u16(a, b);
523 }
524 
525 // CHECK-LABEL: @test_vzip2_u32(
526 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
527 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vzip2_u32(uint32x2_t a,uint32x2_t b)528 uint32x2_t test_vzip2_u32(uint32x2_t a, uint32x2_t b) {
529   return vzip2_u32(a, b);
530 }
531 
532 // CHECK-LABEL: @test_vzip2q_u32(
533 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
534 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vzip2q_u32(uint32x4_t a,uint32x4_t b)535 uint32x4_t test_vzip2q_u32(uint32x4_t a, uint32x4_t b) {
536   return vzip2q_u32(a, b);
537 }
538 
539 // CHECK-LABEL: @test_vzip2q_u64(
540 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
541 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_u64(uint64x2_t a,uint64x2_t b)542 uint64x2_t test_vzip2q_u64(uint64x2_t a, uint64x2_t b) {
543   return vzip2q_u64(a, b);
544 }
545 
546 // CHECK-LABEL: @test_vzip2_f32(
547 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
548 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vzip2_f32(float32x2_t a,float32x2_t b)549 float32x2_t test_vzip2_f32(float32x2_t a, float32x2_t b) {
550   return vzip2_f32(a, b);
551 }
552 
553 // CHECK-LABEL: @test_vzip2q_f32(
554 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
555 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vzip2q_f32(float32x4_t a,float32x4_t b)556 float32x4_t test_vzip2q_f32(float32x4_t a, float32x4_t b) {
557   return vzip2q_f32(a, b);
558 }
559 
560 // CHECK-LABEL: @test_vzip2q_f64(
561 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
562 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vzip2q_f64(float64x2_t a,float64x2_t b)563 float64x2_t test_vzip2q_f64(float64x2_t a, float64x2_t b) {
564   return vzip2q_f64(a, b);
565 }
566 
567 // CHECK-LABEL: @test_vzip2_p8(
568 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
569 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vzip2_p8(poly8x8_t a,poly8x8_t b)570 poly8x8_t test_vzip2_p8(poly8x8_t a, poly8x8_t b) {
571   return vzip2_p8(a, b);
572 }
573 
574 // CHECK-LABEL: @test_vzip2q_p8(
575 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
576 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vzip2q_p8(poly8x16_t a,poly8x16_t b)577 poly8x16_t test_vzip2q_p8(poly8x16_t a, poly8x16_t b) {
578   return vzip2q_p8(a, b);
579 }
580 
581 // CHECK-LABEL: @test_vzip2_p16(
582 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
583 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vzip2_p16(poly16x4_t a,poly16x4_t b)584 poly16x4_t test_vzip2_p16(poly16x4_t a, poly16x4_t b) {
585   return vzip2_p16(a, b);
586 }
587 
588 // CHECK-LABEL: @test_vzip2q_p16(
589 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
590 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vzip2q_p16(poly16x8_t a,poly16x8_t b)591 poly16x8_t test_vzip2q_p16(poly16x8_t a, poly16x8_t b) {
592   return vzip2q_p16(a, b);
593 }
594 
595 // CHECK-LABEL: @test_vtrn1_s8(
596 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
597 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_s8(int8x8_t a,int8x8_t b)598 int8x8_t test_vtrn1_s8(int8x8_t a, int8x8_t b) {
599   return vtrn1_s8(a, b);
600 }
601 
602 // CHECK-LABEL: @test_vtrn1q_s8(
603 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
604 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_s8(int8x16_t a,int8x16_t b)605 int8x16_t test_vtrn1q_s8(int8x16_t a, int8x16_t b) {
606   return vtrn1q_s8(a, b);
607 }
608 
609 // CHECK-LABEL: @test_vtrn1_s16(
610 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
611 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_s16(int16x4_t a,int16x4_t b)612 int16x4_t test_vtrn1_s16(int16x4_t a, int16x4_t b) {
613   return vtrn1_s16(a, b);
614 }
615 
616 // CHECK-LABEL: @test_vtrn1q_s16(
617 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
618 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_s16(int16x8_t a,int16x8_t b)619 int16x8_t test_vtrn1q_s16(int16x8_t a, int16x8_t b) {
620   return vtrn1q_s16(a, b);
621 }
622 
623 // CHECK-LABEL: @test_vtrn1_s32(
624 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
625 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn1_s32(int32x2_t a,int32x2_t b)626 int32x2_t test_vtrn1_s32(int32x2_t a, int32x2_t b) {
627   return vtrn1_s32(a, b);
628 }
629 
630 // CHECK-LABEL: @test_vtrn1q_s32(
631 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
632 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn1q_s32(int32x4_t a,int32x4_t b)633 int32x4_t test_vtrn1q_s32(int32x4_t a, int32x4_t b) {
634   return vtrn1q_s32(a, b);
635 }
636 
637 // CHECK-LABEL: @test_vtrn1q_s64(
638 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
639 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_s64(int64x2_t a,int64x2_t b)640 int64x2_t test_vtrn1q_s64(int64x2_t a, int64x2_t b) {
641   return vtrn1q_s64(a, b);
642 }
643 
644 // CHECK-LABEL: @test_vtrn1_u8(
645 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
646 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_u8(uint8x8_t a,uint8x8_t b)647 uint8x8_t test_vtrn1_u8(uint8x8_t a, uint8x8_t b) {
648   return vtrn1_u8(a, b);
649 }
650 
651 // CHECK-LABEL: @test_vtrn1q_u8(
652 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
653 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_u8(uint8x16_t a,uint8x16_t b)654 uint8x16_t test_vtrn1q_u8(uint8x16_t a, uint8x16_t b) {
655   return vtrn1q_u8(a, b);
656 }
657 
658 // CHECK-LABEL: @test_vtrn1_u16(
659 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
660 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_u16(uint16x4_t a,uint16x4_t b)661 uint16x4_t test_vtrn1_u16(uint16x4_t a, uint16x4_t b) {
662   return vtrn1_u16(a, b);
663 }
664 
665 // CHECK-LABEL: @test_vtrn1q_u16(
666 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
667 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_u16(uint16x8_t a,uint16x8_t b)668 uint16x8_t test_vtrn1q_u16(uint16x8_t a, uint16x8_t b) {
669   return vtrn1q_u16(a, b);
670 }
671 
672 // CHECK-LABEL: @test_vtrn1_u32(
673 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
674 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn1_u32(uint32x2_t a,uint32x2_t b)675 uint32x2_t test_vtrn1_u32(uint32x2_t a, uint32x2_t b) {
676   return vtrn1_u32(a, b);
677 }
678 
679 // CHECK-LABEL: @test_vtrn1q_u32(
680 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
681 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn1q_u32(uint32x4_t a,uint32x4_t b)682 uint32x4_t test_vtrn1q_u32(uint32x4_t a, uint32x4_t b) {
683   return vtrn1q_u32(a, b);
684 }
685 
686 // CHECK-LABEL: @test_vtrn1q_u64(
687 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
688 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_u64(uint64x2_t a,uint64x2_t b)689 uint64x2_t test_vtrn1q_u64(uint64x2_t a, uint64x2_t b) {
690   return vtrn1q_u64(a, b);
691 }
692 
693 // CHECK-LABEL: @test_vtrn1_f32(
694 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
695 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vtrn1_f32(float32x2_t a,float32x2_t b)696 float32x2_t test_vtrn1_f32(float32x2_t a, float32x2_t b) {
697   return vtrn1_f32(a, b);
698 }
699 
700 // CHECK-LABEL: @test_vtrn1q_f32(
701 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
702 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vtrn1q_f32(float32x4_t a,float32x4_t b)703 float32x4_t test_vtrn1q_f32(float32x4_t a, float32x4_t b) {
704   return vtrn1q_f32(a, b);
705 }
706 
707 // CHECK-LABEL: @test_vtrn1q_f64(
708 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
709 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vtrn1q_f64(float64x2_t a,float64x2_t b)710 float64x2_t test_vtrn1q_f64(float64x2_t a, float64x2_t b) {
711   return vtrn1q_f64(a, b);
712 }
713 
714 // CHECK-LABEL: @test_vtrn1_p8(
715 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
716 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn1_p8(poly8x8_t a,poly8x8_t b)717 poly8x8_t test_vtrn1_p8(poly8x8_t a, poly8x8_t b) {
718   return vtrn1_p8(a, b);
719 }
720 
721 // CHECK-LABEL: @test_vtrn1q_p8(
722 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
723 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn1q_p8(poly8x16_t a,poly8x16_t b)724 poly8x16_t test_vtrn1q_p8(poly8x16_t a, poly8x16_t b) {
725   return vtrn1q_p8(a, b);
726 }
727 
728 // CHECK-LABEL: @test_vtrn1_p16(
729 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
730 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn1_p16(poly16x4_t a,poly16x4_t b)731 poly16x4_t test_vtrn1_p16(poly16x4_t a, poly16x4_t b) {
732   return vtrn1_p16(a, b);
733 }
734 
735 // CHECK-LABEL: @test_vtrn1q_p16(
736 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
737 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn1q_p16(poly16x8_t a,poly16x8_t b)738 poly16x8_t test_vtrn1q_p16(poly16x8_t a, poly16x8_t b) {
739   return vtrn1q_p16(a, b);
740 }
741 
742 // CHECK-LABEL: @test_vtrn2_s8(
743 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
744 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_s8(int8x8_t a,int8x8_t b)745 int8x8_t test_vtrn2_s8(int8x8_t a, int8x8_t b) {
746   return vtrn2_s8(a, b);
747 }
748 
749 // CHECK-LABEL: @test_vtrn2q_s8(
750 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
751 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_s8(int8x16_t a,int8x16_t b)752 int8x16_t test_vtrn2q_s8(int8x16_t a, int8x16_t b) {
753   return vtrn2q_s8(a, b);
754 }
755 
756 // CHECK-LABEL: @test_vtrn2_s16(
757 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
758 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_s16(int16x4_t a,int16x4_t b)759 int16x4_t test_vtrn2_s16(int16x4_t a, int16x4_t b) {
760   return vtrn2_s16(a, b);
761 }
762 
763 // CHECK-LABEL: @test_vtrn2q_s16(
764 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
765 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_s16(int16x8_t a,int16x8_t b)766 int16x8_t test_vtrn2q_s16(int16x8_t a, int16x8_t b) {
767   return vtrn2q_s16(a, b);
768 }
769 
770 // CHECK-LABEL: @test_vtrn2_s32(
771 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
772 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn2_s32(int32x2_t a,int32x2_t b)773 int32x2_t test_vtrn2_s32(int32x2_t a, int32x2_t b) {
774   return vtrn2_s32(a, b);
775 }
776 
777 // CHECK-LABEL: @test_vtrn2q_s32(
778 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
779 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn2q_s32(int32x4_t a,int32x4_t b)780 int32x4_t test_vtrn2q_s32(int32x4_t a, int32x4_t b) {
781   return vtrn2q_s32(a, b);
782 }
783 
784 // CHECK-LABEL: @test_vtrn2q_s64(
785 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
786 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_s64(int64x2_t a,int64x2_t b)787 int64x2_t test_vtrn2q_s64(int64x2_t a, int64x2_t b) {
788   return vtrn2q_s64(a, b);
789 }
790 
791 // CHECK-LABEL: @test_vtrn2_u8(
792 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
793 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_u8(uint8x8_t a,uint8x8_t b)794 uint8x8_t test_vtrn2_u8(uint8x8_t a, uint8x8_t b) {
795   return vtrn2_u8(a, b);
796 }
797 
798 // CHECK-LABEL: @test_vtrn2q_u8(
799 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
800 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_u8(uint8x16_t a,uint8x16_t b)801 uint8x16_t test_vtrn2q_u8(uint8x16_t a, uint8x16_t b) {
802   return vtrn2q_u8(a, b);
803 }
804 
805 // CHECK-LABEL: @test_vtrn2_u16(
806 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
807 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_u16(uint16x4_t a,uint16x4_t b)808 uint16x4_t test_vtrn2_u16(uint16x4_t a, uint16x4_t b) {
809   return vtrn2_u16(a, b);
810 }
811 
812 // CHECK-LABEL: @test_vtrn2q_u16(
813 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
814 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_u16(uint16x8_t a,uint16x8_t b)815 uint16x8_t test_vtrn2q_u16(uint16x8_t a, uint16x8_t b) {
816   return vtrn2q_u16(a, b);
817 }
818 
819 // CHECK-LABEL: @test_vtrn2_u32(
820 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
821 // CHECK:   ret <2 x i32> [[SHUFFLE_I]]
test_vtrn2_u32(uint32x2_t a,uint32x2_t b)822 uint32x2_t test_vtrn2_u32(uint32x2_t a, uint32x2_t b) {
823   return vtrn2_u32(a, b);
824 }
825 
826 // CHECK-LABEL: @test_vtrn2q_u32(
827 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
828 // CHECK:   ret <4 x i32> [[SHUFFLE_I]]
test_vtrn2q_u32(uint32x4_t a,uint32x4_t b)829 uint32x4_t test_vtrn2q_u32(uint32x4_t a, uint32x4_t b) {
830   return vtrn2q_u32(a, b);
831 }
832 
833 // CHECK-LABEL: @test_vtrn2q_u64(
834 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
835 // CHECK:   ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_u64(uint64x2_t a,uint64x2_t b)836 uint64x2_t test_vtrn2q_u64(uint64x2_t a, uint64x2_t b) {
837   return vtrn2q_u64(a, b);
838 }
839 
840 // CHECK-LABEL: @test_vtrn2_f32(
841 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
842 // CHECK:   ret <2 x float> [[SHUFFLE_I]]
test_vtrn2_f32(float32x2_t a,float32x2_t b)843 float32x2_t test_vtrn2_f32(float32x2_t a, float32x2_t b) {
844   return vtrn2_f32(a, b);
845 }
846 
847 // CHECK-LABEL: @test_vtrn2q_f32(
848 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
849 // CHECK:   ret <4 x float> [[SHUFFLE_I]]
test_vtrn2q_f32(float32x4_t a,float32x4_t b)850 float32x4_t test_vtrn2q_f32(float32x4_t a, float32x4_t b) {
851   return vtrn2q_f32(a, b);
852 }
853 
854 // CHECK-LABEL: @test_vtrn2q_f64(
855 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
856 // CHECK:   ret <2 x double> [[SHUFFLE_I]]
test_vtrn2q_f64(float64x2_t a,float64x2_t b)857 float64x2_t test_vtrn2q_f64(float64x2_t a, float64x2_t b) {
858   return vtrn2q_f64(a, b);
859 }
860 
861 // CHECK-LABEL: @test_vtrn2_p8(
862 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
863 // CHECK:   ret <8 x i8> [[SHUFFLE_I]]
test_vtrn2_p8(poly8x8_t a,poly8x8_t b)864 poly8x8_t test_vtrn2_p8(poly8x8_t a, poly8x8_t b) {
865   return vtrn2_p8(a, b);
866 }
867 
868 // CHECK-LABEL: @test_vtrn2q_p8(
869 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
870 // CHECK:   ret <16 x i8> [[SHUFFLE_I]]
test_vtrn2q_p8(poly8x16_t a,poly8x16_t b)871 poly8x16_t test_vtrn2q_p8(poly8x16_t a, poly8x16_t b) {
872   return vtrn2q_p8(a, b);
873 }
874 
875 // CHECK-LABEL: @test_vtrn2_p16(
876 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
877 // CHECK:   ret <4 x i16> [[SHUFFLE_I]]
test_vtrn2_p16(poly16x4_t a,poly16x4_t b)878 poly16x4_t test_vtrn2_p16(poly16x4_t a, poly16x4_t b) {
879   return vtrn2_p16(a, b);
880 }
881 
882 // CHECK-LABEL: @test_vtrn2q_p16(
883 // CHECK:   [[SHUFFLE_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
884 // CHECK:   ret <8 x i16> [[SHUFFLE_I]]
test_vtrn2q_p16(poly16x8_t a,poly16x8_t b)885 poly16x8_t test_vtrn2q_p16(poly16x8_t a, poly16x8_t b) {
886   return vtrn2q_p16(a, b);
887 }
888 
889 // CHECK-LABEL: @test_vuzp_s8(
890 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
891 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
892 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
893 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
894 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
895 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
896 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
897 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
898 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
899 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
900 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
901 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
902 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
903 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
904 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
test_vuzp_s8(int8x8_t a,int8x8_t b)905 int8x8x2_t test_vuzp_s8(int8x8_t a, int8x8_t b) {
906   return vuzp_s8(a, b);
907 }
908 
909 // CHECK-LABEL: @test_vuzp_s16(
910 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
911 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
912 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
913 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
914 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
915 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
916 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
917 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
918 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
919 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
920 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
921 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
922 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
923 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
924 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
925 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
926 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
test_vuzp_s16(int16x4_t a,int16x4_t b)927 int16x4x2_t test_vuzp_s16(int16x4_t a, int16x4_t b) {
928   return vuzp_s16(a, b);
929 }
930 
931 // CHECK-LABEL: @test_vuzp_s32(
932 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
933 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
934 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
935 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
936 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
937 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
938 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
939 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
940 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
941 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
942 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
943 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
944 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
945 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
946 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
947 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
948 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
test_vuzp_s32(int32x2_t a,int32x2_t b)949 int32x2x2_t test_vuzp_s32(int32x2_t a, int32x2_t b) {
950   return vuzp_s32(a, b);
951 }
952 
953 // CHECK-LABEL: @test_vuzp_u8(
954 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
955 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
956 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
957 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
958 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
959 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
960 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
961 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
962 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
963 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
964 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
965 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
966 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
967 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
968 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
test_vuzp_u8(uint8x8_t a,uint8x8_t b)969 uint8x8x2_t test_vuzp_u8(uint8x8_t a, uint8x8_t b) {
970   return vuzp_u8(a, b);
971 }
972 
973 // CHECK-LABEL: @test_vuzp_u16(
974 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
975 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
976 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
977 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
978 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
979 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
980 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
981 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
982 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
983 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
984 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
985 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
986 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
987 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
988 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
989 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
990 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
test_vuzp_u16(uint16x4_t a,uint16x4_t b)991 uint16x4x2_t test_vuzp_u16(uint16x4_t a, uint16x4_t b) {
992   return vuzp_u16(a, b);
993 }
994 
995 // CHECK-LABEL: @test_vuzp_u32(
996 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
997 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
998 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
999 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1000 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1001 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1002 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1003 // CHECK:   store <2 x i32> [[VUZP_I]], <2 x i32>* [[TMP3]]
1004 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1005 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1006 // CHECK:   store <2 x i32> [[VUZP1_I]], <2 x i32>* [[TMP4]]
1007 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1008 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1009 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1010 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1011 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1012 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
test_vuzp_u32(uint32x2_t a,uint32x2_t b)1013 uint32x2x2_t test_vuzp_u32(uint32x2_t a, uint32x2_t b) {
1014   return vuzp_u32(a, b);
1015 }
1016 
1017 // CHECK-LABEL: @test_vuzp_f32(
1018 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1019 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1020 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1021 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1022 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1023 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1024 // CHECK:   [[VUZP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1025 // CHECK:   store <2 x float> [[VUZP_I]], <2 x float>* [[TMP3]]
1026 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1027 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1028 // CHECK:   store <2 x float> [[VUZP1_I]], <2 x float>* [[TMP4]]
1029 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1030 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1031 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1032 // CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
1033 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1034 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
test_vuzp_f32(float32x2_t a,float32x2_t b)1035 float32x2x2_t test_vuzp_f32(float32x2_t a, float32x2_t b) {
1036   return vuzp_f32(a, b);
1037 }
1038 
1039 // CHECK-LABEL: @test_vuzp_p8(
1040 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1041 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1042 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1043 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1044 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1045 // CHECK:   store <8 x i8> [[VUZP_I]], <8 x i8>* [[TMP1]]
1046 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1047 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1048 // CHECK:   store <8 x i8> [[VUZP1_I]], <8 x i8>* [[TMP2]]
1049 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1050 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1051 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1052 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1053 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1054 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
test_vuzp_p8(poly8x8_t a,poly8x8_t b)1055 poly8x8x2_t test_vuzp_p8(poly8x8_t a, poly8x8_t b) {
1056   return vuzp_p8(a, b);
1057 }
1058 
1059 // CHECK-LABEL: @test_vuzp_p16(
1060 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1061 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1062 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1063 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1064 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1065 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1066 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1067 // CHECK:   store <4 x i16> [[VUZP_I]], <4 x i16>* [[TMP3]]
1068 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1069 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1070 // CHECK:   store <4 x i16> [[VUZP1_I]], <4 x i16>* [[TMP4]]
1071 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1072 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1073 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1074 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1075 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1076 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
test_vuzp_p16(poly16x4_t a,poly16x4_t b)1077 poly16x4x2_t test_vuzp_p16(poly16x4_t a, poly16x4_t b) {
1078   return vuzp_p16(a, b);
1079 }
1080 
1081 // CHECK-LABEL: @test_vuzpq_s8(
1082 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1083 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1084 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1085 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1086 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1087 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1088 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1089 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1090 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1091 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1092 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1093 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1094 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1095 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1096 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
test_vuzpq_s8(int8x16_t a,int8x16_t b)1097 int8x16x2_t test_vuzpq_s8(int8x16_t a, int8x16_t b) {
1098   return vuzpq_s8(a, b);
1099 }
1100 
1101 // CHECK-LABEL: @test_vuzpq_s16(
1102 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1103 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1104 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1105 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1106 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1107 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1108 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1109 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1110 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1111 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1112 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
1113 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1114 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1115 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1116 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1117 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1118 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
test_vuzpq_s16(int16x8_t a,int16x8_t b)1119 int16x8x2_t test_vuzpq_s16(int16x8_t a, int16x8_t b) {
1120   return vuzpq_s16(a, b);
1121 }
1122 
1123 // CHECK-LABEL: @test_vuzpq_s32(
1124 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1125 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1126 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1127 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1128 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1129 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1130 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1131 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1132 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1133 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1134 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
1135 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1136 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1137 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1138 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1139 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1140 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
test_vuzpq_s32(int32x4_t a,int32x4_t b)1141 int32x4x2_t test_vuzpq_s32(int32x4_t a, int32x4_t b) {
1142   return vuzpq_s32(a, b);
1143 }
1144 
1145 // CHECK-LABEL: @test_vuzpq_u8(
1146 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1147 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1148 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1149 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1150 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1151 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1152 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1153 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1154 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1155 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1156 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1157 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1158 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1159 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1160 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
test_vuzpq_u8(uint8x16_t a,uint8x16_t b)1161 uint8x16x2_t test_vuzpq_u8(uint8x16_t a, uint8x16_t b) {
1162   return vuzpq_u8(a, b);
1163 }
1164 
1165 // CHECK-LABEL: @test_vuzpq_u16(
1166 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1167 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1168 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1169 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1170 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1171 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1172 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1173 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1174 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1175 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1176 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
1177 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1178 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1179 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1180 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1181 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1182 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
test_vuzpq_u16(uint16x8_t a,uint16x8_t b)1183 uint16x8x2_t test_vuzpq_u16(uint16x8_t a, uint16x8_t b) {
1184   return vuzpq_u16(a, b);
1185 }
1186 
1187 // CHECK-LABEL: @test_vuzpq_u32(
1188 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1189 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1190 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1191 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1192 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1193 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1194 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1195 // CHECK:   store <4 x i32> [[VUZP_I]], <4 x i32>* [[TMP3]]
1196 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1197 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1198 // CHECK:   store <4 x i32> [[VUZP1_I]], <4 x i32>* [[TMP4]]
1199 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1200 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1201 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1202 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1203 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1204 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
test_vuzpq_u32(uint32x4_t a,uint32x4_t b)1205 uint32x4x2_t test_vuzpq_u32(uint32x4_t a, uint32x4_t b) {
1206   return vuzpq_u32(a, b);
1207 }
1208 
1209 // CHECK-LABEL: @test_vuzpq_f32(
1210 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1211 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1212 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1213 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1214 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1215 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1216 // CHECK:   [[VUZP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1217 // CHECK:   store <4 x float> [[VUZP_I]], <4 x float>* [[TMP3]]
1218 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1219 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
1220 // CHECK:   store <4 x float> [[VUZP1_I]], <4 x float>* [[TMP4]]
1221 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1222 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1223 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1224 // CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
1225 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1226 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
test_vuzpq_f32(float32x4_t a,float32x4_t b)1227 float32x4x2_t test_vuzpq_f32(float32x4_t a, float32x4_t b) {
1228   return vuzpq_f32(a, b);
1229 }
1230 
1231 // CHECK-LABEL: @test_vuzpq_p8(
1232 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1233 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1234 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1235 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1236 // CHECK:   [[VUZP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
1237 // CHECK:   store <16 x i8> [[VUZP_I]], <16 x i8>* [[TMP1]]
1238 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1239 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
1240 // CHECK:   store <16 x i8> [[VUZP1_I]], <16 x i8>* [[TMP2]]
1241 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1242 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1243 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1244 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1245 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1246 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
test_vuzpq_p8(poly8x16_t a,poly8x16_t b)1247 poly8x16x2_t test_vuzpq_p8(poly8x16_t a, poly8x16_t b) {
1248   return vuzpq_p8(a, b);
1249 }
1250 
1251 // CHECK-LABEL: @test_vuzpq_p16(
1252 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1253 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1254 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1255 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1256 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1257 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1258 // CHECK:   [[VUZP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
1259 // CHECK:   store <8 x i16> [[VUZP_I]], <8 x i16>* [[TMP3]]
1260 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1261 // CHECK:   [[VUZP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
1262 // CHECK:   store <8 x i16> [[VUZP1_I]], <8 x i16>* [[TMP4]]
1263 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1264 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1265 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1266 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1267 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1268 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
test_vuzpq_p16(poly16x8_t a,poly16x8_t b)1269 poly16x8x2_t test_vuzpq_p16(poly16x8_t a, poly16x8_t b) {
1270   return vuzpq_p16(a, b);
1271 }
1272 
1273 // CHECK-LABEL: @test_vzip_s8(
1274 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1275 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1276 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1277 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1278 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1279 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1280 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1281 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1282 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1283 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1284 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1285 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1286 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1287 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1288 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
test_vzip_s8(int8x8_t a,int8x8_t b)1289 int8x8x2_t test_vzip_s8(int8x8_t a, int8x8_t b) {
1290   return vzip_s8(a, b);
1291 }
1292 
1293 // CHECK-LABEL: @test_vzip_s16(
1294 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1295 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1296 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1297 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1298 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1299 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1300 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1301 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1302 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1303 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1304 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
1305 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1306 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1307 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1308 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1309 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1310 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
test_vzip_s16(int16x4_t a,int16x4_t b)1311 int16x4x2_t test_vzip_s16(int16x4_t a, int16x4_t b) {
1312   return vzip_s16(a, b);
1313 }
1314 
1315 // CHECK-LABEL: @test_vzip_s32(
1316 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1317 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1318 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1319 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1320 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1321 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1322 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1323 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1324 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1325 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1326 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
1327 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1328 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1329 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1330 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1331 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1332 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
test_vzip_s32(int32x2_t a,int32x2_t b)1333 int32x2x2_t test_vzip_s32(int32x2_t a, int32x2_t b) {
1334   return vzip_s32(a, b);
1335 }
1336 
1337 // CHECK-LABEL: @test_vzip_u8(
1338 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1339 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1340 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1341 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1342 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1343 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1344 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1345 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1346 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1347 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1348 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1349 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1350 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1351 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1352 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
test_vzip_u8(uint8x8_t a,uint8x8_t b)1353 uint8x8x2_t test_vzip_u8(uint8x8_t a, uint8x8_t b) {
1354   return vzip_u8(a, b);
1355 }
1356 
1357 // CHECK-LABEL: @test_vzip_u16(
1358 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1359 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1360 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1361 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1362 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1363 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1364 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1365 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1366 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1367 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1368 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
1369 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1370 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1371 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1372 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1373 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1374 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
test_vzip_u16(uint16x4_t a,uint16x4_t b)1375 uint16x4x2_t test_vzip_u16(uint16x4_t a, uint16x4_t b) {
1376   return vzip_u16(a, b);
1377 }
1378 
1379 // CHECK-LABEL: @test_vzip_u32(
1380 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1381 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1382 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1383 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1384 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1385 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1386 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1387 // CHECK:   store <2 x i32> [[VZIP_I]], <2 x i32>* [[TMP3]]
1388 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1389 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1390 // CHECK:   store <2 x i32> [[VZIP1_I]], <2 x i32>* [[TMP4]]
1391 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1392 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1393 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1394 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1395 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1396 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
test_vzip_u32(uint32x2_t a,uint32x2_t b)1397 uint32x2x2_t test_vzip_u32(uint32x2_t a, uint32x2_t b) {
1398   return vzip_u32(a, b);
1399 }
1400 
1401 // CHECK-LABEL: @test_vzip_f32(
1402 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1403 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1404 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1405 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1406 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1407 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1408 // CHECK:   [[VZIP_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1409 // CHECK:   store <2 x float> [[VZIP_I]], <2 x float>* [[TMP3]]
1410 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1411 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1412 // CHECK:   store <2 x float> [[VZIP1_I]], <2 x float>* [[TMP4]]
1413 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1414 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1415 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1416 // CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
1417 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1418 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
test_vzip_f32(float32x2_t a,float32x2_t b)1419 float32x2x2_t test_vzip_f32(float32x2_t a, float32x2_t b) {
1420   return vzip_f32(a, b);
1421 }
1422 
1423 // CHECK-LABEL: @test_vzip_p8(
1424 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1425 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1426 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1427 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1428 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1429 // CHECK:   store <8 x i8> [[VZIP_I]], <8 x i8>* [[TMP1]]
1430 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1431 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1432 // CHECK:   store <8 x i8> [[VZIP1_I]], <8 x i8>* [[TMP2]]
1433 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1434 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1435 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1436 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1437 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1438 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
test_vzip_p8(poly8x8_t a,poly8x8_t b)1439 poly8x8x2_t test_vzip_p8(poly8x8_t a, poly8x8_t b) {
1440   return vzip_p8(a, b);
1441 }
1442 
1443 // CHECK-LABEL: @test_vzip_p16(
1444 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1445 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1446 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1447 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1448 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1449 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1450 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1451 // CHECK:   store <4 x i16> [[VZIP_I]], <4 x i16>* [[TMP3]]
1452 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1453 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1454 // CHECK:   store <4 x i16> [[VZIP1_I]], <4 x i16>* [[TMP4]]
1455 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1456 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1457 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1458 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1459 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1460 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
test_vzip_p16(poly16x4_t a,poly16x4_t b)1461 poly16x4x2_t test_vzip_p16(poly16x4_t a, poly16x4_t b) {
1462   return vzip_p16(a, b);
1463 }
1464 
1465 // CHECK-LABEL: @test_vzipq_s8(
1466 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1467 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1468 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1469 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1470 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1471 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1472 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1473 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1474 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1475 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1476 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1477 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1478 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1479 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1480 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
test_vzipq_s8(int8x16_t a,int8x16_t b)1481 int8x16x2_t test_vzipq_s8(int8x16_t a, int8x16_t b) {
1482   return vzipq_s8(a, b);
1483 }
1484 
1485 // CHECK-LABEL: @test_vzipq_s16(
1486 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1487 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1488 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1489 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1490 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1491 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1492 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1493 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1494 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1495 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1496 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
1497 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1498 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1499 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1500 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1501 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1502 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
test_vzipq_s16(int16x8_t a,int16x8_t b)1503 int16x8x2_t test_vzipq_s16(int16x8_t a, int16x8_t b) {
1504   return vzipq_s16(a, b);
1505 }
1506 
1507 // CHECK-LABEL: @test_vzipq_s32(
1508 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1509 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1510 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1511 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1512 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1513 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1514 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1515 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1516 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1517 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1518 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
1519 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1520 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1521 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1522 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1523 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1524 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
test_vzipq_s32(int32x4_t a,int32x4_t b)1525 int32x4x2_t test_vzipq_s32(int32x4_t a, int32x4_t b) {
1526   return vzipq_s32(a, b);
1527 }
1528 
1529 // CHECK-LABEL: @test_vzipq_u8(
1530 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1531 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1532 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1533 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1534 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1535 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1536 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1537 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1538 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1539 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1540 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1541 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1542 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1543 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1544 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
test_vzipq_u8(uint8x16_t a,uint8x16_t b)1545 uint8x16x2_t test_vzipq_u8(uint8x16_t a, uint8x16_t b) {
1546   return vzipq_u8(a, b);
1547 }
1548 
1549 // CHECK-LABEL: @test_vzipq_u16(
1550 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1551 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1552 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1553 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1554 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1555 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1556 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1557 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1558 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1559 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1560 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
1561 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1562 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1563 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1564 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1565 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1566 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
test_vzipq_u16(uint16x8_t a,uint16x8_t b)1567 uint16x8x2_t test_vzipq_u16(uint16x8_t a, uint16x8_t b) {
1568   return vzipq_u16(a, b);
1569 }
1570 
1571 // CHECK-LABEL: @test_vzipq_u32(
1572 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1573 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1574 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1575 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1576 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1577 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1578 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1579 // CHECK:   store <4 x i32> [[VZIP_I]], <4 x i32>* [[TMP3]]
1580 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1581 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1582 // CHECK:   store <4 x i32> [[VZIP1_I]], <4 x i32>* [[TMP4]]
1583 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1584 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1585 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1586 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1587 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1588 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
test_vzipq_u32(uint32x4_t a,uint32x4_t b)1589 uint32x4x2_t test_vzipq_u32(uint32x4_t a, uint32x4_t b) {
1590   return vzipq_u32(a, b);
1591 }
1592 
1593 // CHECK-LABEL: @test_vzipq_f32(
1594 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1595 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1596 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1597 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1598 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1599 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1600 // CHECK:   [[VZIP_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
1601 // CHECK:   store <4 x float> [[VZIP_I]], <4 x float>* [[TMP3]]
1602 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1603 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1604 // CHECK:   store <4 x float> [[VZIP1_I]], <4 x float>* [[TMP4]]
1605 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1606 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1607 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1608 // CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
1609 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1610 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
test_vzipq_f32(float32x4_t a,float32x4_t b)1611 float32x4x2_t test_vzipq_f32(float32x4_t a, float32x4_t b) {
1612   return vzipq_f32(a, b);
1613 }
1614 
1615 // CHECK-LABEL: @test_vzipq_p8(
1616 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
1617 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
1618 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
1619 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1620 // CHECK:   [[VZIP_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
1621 // CHECK:   store <16 x i8> [[VZIP_I]], <16 x i8>* [[TMP1]]
1622 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1623 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
1624 // CHECK:   store <16 x i8> [[VZIP1_I]], <16 x i8>* [[TMP2]]
1625 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
1626 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
1627 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
1628 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1629 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
1630 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
test_vzipq_p8(poly8x16_t a,poly8x16_t b)1631 poly8x16x2_t test_vzipq_p8(poly8x16_t a, poly8x16_t b) {
1632   return vzipq_p8(a, b);
1633 }
1634 
1635 // CHECK-LABEL: @test_vzipq_p16(
1636 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
1637 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
1638 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
1639 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1640 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1641 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1642 // CHECK:   [[VZIP_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
1643 // CHECK:   store <8 x i16> [[VZIP_I]], <8 x i16>* [[TMP3]]
1644 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1645 // CHECK:   [[VZIP1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
1646 // CHECK:   store <8 x i16> [[VZIP1_I]], <8 x i16>* [[TMP4]]
1647 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
1648 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
1649 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
1650 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1651 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
1652 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
test_vzipq_p16(poly16x8_t a,poly16x8_t b)1653 poly16x8x2_t test_vzipq_p16(poly16x8_t a, poly16x8_t b) {
1654   return vzipq_p16(a, b);
1655 }
1656 
1657 // CHECK-LABEL: @test_vtrn_s8(
1658 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x8x2_t, align 8
1659 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x8x2_t, align 8
1660 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x8x2_t* [[RETVAL_I]] to i8*
1661 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1662 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1663 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1664 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1665 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1666 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1667 // CHECK:   [[TMP5:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL_I]], align 8
1668 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], i32 0, i32 0
1669 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x8x2_t [[TMP5]], 0
1670 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1671 // CHECK:   [[TMP8:%.*]] = load %struct.int8x8x2_t, %struct.int8x8x2_t* [[RETVAL]], align 8
1672 // CHECK:   ret %struct.int8x8x2_t [[TMP8]]
test_vtrn_s8(int8x8_t a,int8x8_t b)1673 int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
1674   return vtrn_s8(a, b);
1675 }
1676 
1677 // CHECK-LABEL: @test_vtrn_s16(
1678 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x4x2_t, align 8
1679 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x4x2_t, align 8
1680 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x4x2_t* [[RETVAL_I]] to i8*
1681 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1682 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1683 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1684 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1685 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1686 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1687 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1688 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
1689 // CHECK:   [[TMP7:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL_I]], align 8
1690 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], i32 0, i32 0
1691 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x4x2_t [[TMP7]], 0
1692 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1693 // CHECK:   [[TMP10:%.*]] = load %struct.int16x4x2_t, %struct.int16x4x2_t* [[RETVAL]], align 8
1694 // CHECK:   ret %struct.int16x4x2_t [[TMP10]]
test_vtrn_s16(int16x4_t a,int16x4_t b)1695 int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
1696   return vtrn_s16(a, b);
1697 }
1698 
1699 // CHECK-LABEL: @test_vtrn_s32(
1700 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x2x2_t, align 8
1701 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x2x2_t, align 8
1702 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x2x2_t* [[RETVAL_I]] to i8*
1703 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1704 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1705 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1706 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1707 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1708 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1709 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1710 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
1711 // CHECK:   [[TMP7:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL_I]], align 8
1712 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], i32 0, i32 0
1713 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x2x2_t [[TMP7]], 0
1714 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1715 // CHECK:   [[TMP10:%.*]] = load %struct.int32x2x2_t, %struct.int32x2x2_t* [[RETVAL]], align 8
1716 // CHECK:   ret %struct.int32x2x2_t [[TMP10]]
test_vtrn_s32(int32x2_t a,int32x2_t b)1717 int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
1718   return vtrn_s32(a, b);
1719 }
1720 
1721 // CHECK-LABEL: @test_vtrn_u8(
1722 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x8x2_t, align 8
1723 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x8x2_t, align 8
1724 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x8x2_t* [[RETVAL_I]] to i8*
1725 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1726 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1727 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1728 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1729 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1730 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1731 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL_I]], align 8
1732 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], i32 0, i32 0
1733 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x8x2_t [[TMP5]], 0
1734 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1735 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x8x2_t, %struct.uint8x8x2_t* [[RETVAL]], align 8
1736 // CHECK:   ret %struct.uint8x8x2_t [[TMP8]]
test_vtrn_u8(uint8x8_t a,uint8x8_t b)1737 uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
1738   return vtrn_u8(a, b);
1739 }
1740 
1741 // CHECK-LABEL: @test_vtrn_u16(
1742 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x4x2_t, align 8
1743 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x4x2_t, align 8
1744 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x4x2_t* [[RETVAL_I]] to i8*
1745 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1746 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1747 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1748 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1749 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1750 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1751 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1752 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
1753 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL_I]], align 8
1754 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], i32 0, i32 0
1755 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x4x2_t [[TMP7]], 0
1756 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1757 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x4x2_t, %struct.uint16x4x2_t* [[RETVAL]], align 8
1758 // CHECK:   ret %struct.uint16x4x2_t [[TMP10]]
test_vtrn_u16(uint16x4_t a,uint16x4_t b)1759 uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
1760   return vtrn_u16(a, b);
1761 }
1762 
1763 // CHECK-LABEL: @test_vtrn_u32(
1764 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x2x2_t, align 8
1765 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x2x2_t, align 8
1766 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x2x2_t* [[RETVAL_I]] to i8*
1767 // CHECK:   [[TMP1:%.*]] = bitcast <2 x i32> %a to <8 x i8>
1768 // CHECK:   [[TMP2:%.*]] = bitcast <2 x i32> %b to <8 x i8>
1769 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x i32>*
1770 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 0, i32 2>
1771 // CHECK:   store <2 x i32> [[VTRN_I]], <2 x i32>* [[TMP3]]
1772 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x i32>, <2 x i32>* [[TMP3]], i32 1
1773 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x i32> %a, <2 x i32> %b, <2 x i32> <i32 1, i32 3>
1774 // CHECK:   store <2 x i32> [[VTRN1_I]], <2 x i32>* [[TMP4]]
1775 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL_I]], align 8
1776 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], i32 0, i32 0
1777 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x2x2_t [[TMP7]], 0
1778 // CHECK:   store [2 x <2 x i32>] [[TMP9]], [2 x <2 x i32>]* [[TMP8]], align 8
1779 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x2x2_t, %struct.uint32x2x2_t* [[RETVAL]], align 8
1780 // CHECK:   ret %struct.uint32x2x2_t [[TMP10]]
test_vtrn_u32(uint32x2_t a,uint32x2_t b)1781 uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
1782   return vtrn_u32(a, b);
1783 }
1784 
1785 // CHECK-LABEL: @test_vtrn_f32(
1786 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x2x2_t, align 8
1787 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x2x2_t, align 8
1788 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x2x2_t* [[RETVAL_I]] to i8*
1789 // CHECK:   [[TMP1:%.*]] = bitcast <2 x float> %a to <8 x i8>
1790 // CHECK:   [[TMP2:%.*]] = bitcast <2 x float> %b to <8 x i8>
1791 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <2 x float>*
1792 // CHECK:   [[VTRN_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 0, i32 2>
1793 // CHECK:   store <2 x float> [[VTRN_I]], <2 x float>* [[TMP3]]
1794 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <2 x float>, <2 x float>* [[TMP3]], i32 1
1795 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <2 x float> %a, <2 x float> %b, <2 x i32> <i32 1, i32 3>
1796 // CHECK:   store <2 x float> [[VTRN1_I]], <2 x float>* [[TMP4]]
1797 // CHECK:   [[TMP7:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL_I]], align 8
1798 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], i32 0, i32 0
1799 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x2x2_t [[TMP7]], 0
1800 // CHECK:   store [2 x <2 x float>] [[TMP9]], [2 x <2 x float>]* [[TMP8]], align 8
1801 // CHECK:   [[TMP10:%.*]] = load %struct.float32x2x2_t, %struct.float32x2x2_t* [[RETVAL]], align 8
1802 // CHECK:   ret %struct.float32x2x2_t [[TMP10]]
test_vtrn_f32(float32x2_t a,float32x2_t b)1803 float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
1804   return vtrn_f32(a, b);
1805 }
1806 
1807 // CHECK-LABEL: @test_vtrn_p8(
1808 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x8x2_t, align 8
1809 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x8x2_t, align 8
1810 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x8x2_t* [[RETVAL_I]] to i8*
1811 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <8 x i8>*
1812 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1813 // CHECK:   store <8 x i8> [[VTRN_I]], <8 x i8>* [[TMP1]]
1814 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <8 x i8>, <8 x i8>* [[TMP1]], i32 1
1815 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1816 // CHECK:   store <8 x i8> [[VTRN1_I]], <8 x i8>* [[TMP2]]
1817 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL_I]], align 8
1818 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], i32 0, i32 0
1819 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x8x2_t [[TMP5]], 0
1820 // CHECK:   store [2 x <8 x i8>] [[TMP7]], [2 x <8 x i8>]* [[TMP6]], align 8
1821 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x8x2_t, %struct.poly8x8x2_t* [[RETVAL]], align 8
1822 // CHECK:   ret %struct.poly8x8x2_t [[TMP8]]
test_vtrn_p8(poly8x8_t a,poly8x8_t b)1823 poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
1824   return vtrn_p8(a, b);
1825 }
1826 
1827 // CHECK-LABEL: @test_vtrn_p16(
1828 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x4x2_t, align 8
1829 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x4x2_t, align 8
1830 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x4x2_t* [[RETVAL_I]] to i8*
1831 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i16> %a to <8 x i8>
1832 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i16> %b to <8 x i8>
1833 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i16>*
1834 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1835 // CHECK:   store <4 x i16> [[VTRN_I]], <4 x i16>* [[TMP3]]
1836 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i16>, <4 x i16>* [[TMP3]], i32 1
1837 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1838 // CHECK:   store <4 x i16> [[VTRN1_I]], <4 x i16>* [[TMP4]]
1839 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL_I]], align 8
1840 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], i32 0, i32 0
1841 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x4x2_t [[TMP7]], 0
1842 // CHECK:   store [2 x <4 x i16>] [[TMP9]], [2 x <4 x i16>]* [[TMP8]], align 8
1843 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x4x2_t, %struct.poly16x4x2_t* [[RETVAL]], align 8
1844 // CHECK:   ret %struct.poly16x4x2_t [[TMP10]]
test_vtrn_p16(poly16x4_t a,poly16x4_t b)1845 poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
1846   return vtrn_p16(a, b);
1847 }
1848 
1849 // CHECK-LABEL: @test_vtrnq_s8(
1850 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int8x16x2_t, align 16
1851 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int8x16x2_t, align 16
1852 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int8x16x2_t* [[RETVAL_I]] to i8*
1853 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1854 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1855 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
1856 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1857 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1858 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
1859 // CHECK:   [[TMP5:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL_I]], align 16
1860 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], i32 0, i32 0
1861 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.int8x16x2_t [[TMP5]], 0
1862 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1863 // CHECK:   [[TMP8:%.*]] = load %struct.int8x16x2_t, %struct.int8x16x2_t* [[RETVAL]], align 16
1864 // CHECK:   ret %struct.int8x16x2_t [[TMP8]]
test_vtrnq_s8(int8x16_t a,int8x16_t b)1865 int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
1866   return vtrnq_s8(a, b);
1867 }
1868 
1869 // CHECK-LABEL: @test_vtrnq_s16(
1870 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int16x8x2_t, align 16
1871 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int16x8x2_t, align 16
1872 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int16x8x2_t* [[RETVAL_I]] to i8*
1873 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1874 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1875 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1876 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1877 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
1878 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1879 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1880 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
1881 // CHECK:   [[TMP7:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL_I]], align 16
1882 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], i32 0, i32 0
1883 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int16x8x2_t [[TMP7]], 0
1884 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1885 // CHECK:   [[TMP10:%.*]] = load %struct.int16x8x2_t, %struct.int16x8x2_t* [[RETVAL]], align 16
1886 // CHECK:   ret %struct.int16x8x2_t [[TMP10]]
test_vtrnq_s16(int16x8_t a,int16x8_t b)1887 int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
1888   return vtrnq_s16(a, b);
1889 }
1890 
1891 // CHECK-LABEL: @test_vtrnq_s32(
1892 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.int32x4x2_t, align 16
1893 // CHECK:   [[RETVAL:%.*]] = alloca %struct.int32x4x2_t, align 16
1894 // CHECK:   [[TMP0:%.*]] = bitcast %struct.int32x4x2_t* [[RETVAL_I]] to i8*
1895 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1896 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1897 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1898 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1899 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
1900 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1901 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1902 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
1903 // CHECK:   [[TMP7:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL_I]], align 16
1904 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], i32 0, i32 0
1905 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.int32x4x2_t [[TMP7]], 0
1906 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1907 // CHECK:   [[TMP10:%.*]] = load %struct.int32x4x2_t, %struct.int32x4x2_t* [[RETVAL]], align 16
1908 // CHECK:   ret %struct.int32x4x2_t [[TMP10]]
test_vtrnq_s32(int32x4_t a,int32x4_t b)1909 int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
1910   return vtrnq_s32(a, b);
1911 }
1912 
1913 // CHECK-LABEL: @test_vtrnq_u8(
1914 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint8x16x2_t, align 16
1915 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint8x16x2_t, align 16
1916 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint8x16x2_t* [[RETVAL_I]] to i8*
1917 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
1918 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
1919 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
1920 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
1921 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
1922 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
1923 // CHECK:   [[TMP5:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL_I]], align 16
1924 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], i32 0, i32 0
1925 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.uint8x16x2_t [[TMP5]], 0
1926 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
1927 // CHECK:   [[TMP8:%.*]] = load %struct.uint8x16x2_t, %struct.uint8x16x2_t* [[RETVAL]], align 16
1928 // CHECK:   ret %struct.uint8x16x2_t [[TMP8]]
test_vtrnq_u8(uint8x16_t a,uint8x16_t b)1929 uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
1930   return vtrnq_u8(a, b);
1931 }
1932 
1933 // CHECK-LABEL: @test_vtrnq_u16(
1934 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint16x8x2_t, align 16
1935 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint16x8x2_t, align 16
1936 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint16x8x2_t* [[RETVAL_I]] to i8*
1937 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
1938 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
1939 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
1940 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1941 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
1942 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
1943 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
1944 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
1945 // CHECK:   [[TMP7:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL_I]], align 16
1946 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], i32 0, i32 0
1947 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint16x8x2_t [[TMP7]], 0
1948 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
1949 // CHECK:   [[TMP10:%.*]] = load %struct.uint16x8x2_t, %struct.uint16x8x2_t* [[RETVAL]], align 16
1950 // CHECK:   ret %struct.uint16x8x2_t [[TMP10]]
test_vtrnq_u16(uint16x8_t a,uint16x8_t b)1951 uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
1952   return vtrnq_u16(a, b);
1953 }
1954 
1955 // CHECK-LABEL: @test_vtrnq_u32(
1956 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.uint32x4x2_t, align 16
1957 // CHECK:   [[RETVAL:%.*]] = alloca %struct.uint32x4x2_t, align 16
1958 // CHECK:   [[TMP0:%.*]] = bitcast %struct.uint32x4x2_t* [[RETVAL_I]] to i8*
1959 // CHECK:   [[TMP1:%.*]] = bitcast <4 x i32> %a to <16 x i8>
1960 // CHECK:   [[TMP2:%.*]] = bitcast <4 x i32> %b to <16 x i8>
1961 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x i32>*
1962 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1963 // CHECK:   store <4 x i32> [[VTRN_I]], <4 x i32>* [[TMP3]]
1964 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x i32>, <4 x i32>* [[TMP3]], i32 1
1965 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1966 // CHECK:   store <4 x i32> [[VTRN1_I]], <4 x i32>* [[TMP4]]
1967 // CHECK:   [[TMP7:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL_I]], align 16
1968 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], i32 0, i32 0
1969 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.uint32x4x2_t [[TMP7]], 0
1970 // CHECK:   store [2 x <4 x i32>] [[TMP9]], [2 x <4 x i32>]* [[TMP8]], align 16
1971 // CHECK:   [[TMP10:%.*]] = load %struct.uint32x4x2_t, %struct.uint32x4x2_t* [[RETVAL]], align 16
1972 // CHECK:   ret %struct.uint32x4x2_t [[TMP10]]
test_vtrnq_u32(uint32x4_t a,uint32x4_t b)1973 uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
1974   return vtrnq_u32(a, b);
1975 }
1976 
1977 // CHECK-LABEL: @test_vtrnq_f32(
1978 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.float32x4x2_t, align 16
1979 // CHECK:   [[RETVAL:%.*]] = alloca %struct.float32x4x2_t, align 16
1980 // CHECK:   [[TMP0:%.*]] = bitcast %struct.float32x4x2_t* [[RETVAL_I]] to i8*
1981 // CHECK:   [[TMP1:%.*]] = bitcast <4 x float> %a to <16 x i8>
1982 // CHECK:   [[TMP2:%.*]] = bitcast <4 x float> %b to <16 x i8>
1983 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <4 x float>*
1984 // CHECK:   [[VTRN_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1985 // CHECK:   store <4 x float> [[VTRN_I]], <4 x float>* [[TMP3]]
1986 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <4 x float>, <4 x float>* [[TMP3]], i32 1
1987 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
1988 // CHECK:   store <4 x float> [[VTRN1_I]], <4 x float>* [[TMP4]]
1989 // CHECK:   [[TMP7:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL_I]], align 16
1990 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], i32 0, i32 0
1991 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.float32x4x2_t [[TMP7]], 0
1992 // CHECK:   store [2 x <4 x float>] [[TMP9]], [2 x <4 x float>]* [[TMP8]], align 16
1993 // CHECK:   [[TMP10:%.*]] = load %struct.float32x4x2_t, %struct.float32x4x2_t* [[RETVAL]], align 16
1994 // CHECK:   ret %struct.float32x4x2_t [[TMP10]]
test_vtrnq_f32(float32x4_t a,float32x4_t b)1995 float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
1996   return vtrnq_f32(a, b);
1997 }
1998 
1999 // CHECK-LABEL: @test_vtrnq_p8(
2000 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly8x16x2_t, align 16
2001 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly8x16x2_t, align 16
2002 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly8x16x2_t* [[RETVAL_I]] to i8*
2003 // CHECK:   [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <16 x i8>*
2004 // CHECK:   [[VTRN_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
2005 // CHECK:   store <16 x i8> [[VTRN_I]], <16 x i8>* [[TMP1]]
2006 // CHECK:   [[TMP2:%.*]] = getelementptr inbounds <16 x i8>, <16 x i8>* [[TMP1]], i32 1
2007 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
2008 // CHECK:   store <16 x i8> [[VTRN1_I]], <16 x i8>* [[TMP2]]
2009 // CHECK:   [[TMP5:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL_I]], align 16
2010 // CHECK:   [[TMP6:%.*]] = getelementptr inbounds %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], i32 0, i32 0
2011 // CHECK:   [[TMP7:%.*]] = extractvalue %struct.poly8x16x2_t [[TMP5]], 0
2012 // CHECK:   store [2 x <16 x i8>] [[TMP7]], [2 x <16 x i8>]* [[TMP6]], align 16
2013 // CHECK:   [[TMP8:%.*]] = load %struct.poly8x16x2_t, %struct.poly8x16x2_t* [[RETVAL]], align 16
2014 // CHECK:   ret %struct.poly8x16x2_t [[TMP8]]
test_vtrnq_p8(poly8x16_t a,poly8x16_t b)2015 poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
2016   return vtrnq_p8(a, b);
2017 }
2018 
2019 // CHECK-LABEL: @test_vtrnq_p16(
2020 // CHECK:   [[RETVAL_I:%.*]] = alloca %struct.poly16x8x2_t, align 16
2021 // CHECK:   [[RETVAL:%.*]] = alloca %struct.poly16x8x2_t, align 16
2022 // CHECK:   [[TMP0:%.*]] = bitcast %struct.poly16x8x2_t* [[RETVAL_I]] to i8*
2023 // CHECK:   [[TMP1:%.*]] = bitcast <8 x i16> %a to <16 x i8>
2024 // CHECK:   [[TMP2:%.*]] = bitcast <8 x i16> %b to <16 x i8>
2025 // CHECK:   [[TMP3:%.*]] = bitcast i8* [[TMP0]] to <8 x i16>*
2026 // CHECK:   [[VTRN_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
2027 // CHECK:   store <8 x i16> [[VTRN_I]], <8 x i16>* [[TMP3]]
2028 // CHECK:   [[TMP4:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[TMP3]], i32 1
2029 // CHECK:   [[VTRN1_I:%.*]] = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2030 // CHECK:   store <8 x i16> [[VTRN1_I]], <8 x i16>* [[TMP4]]
2031 // CHECK:   [[TMP7:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL_I]], align 16
2032 // CHECK:   [[TMP8:%.*]] = getelementptr inbounds %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], i32 0, i32 0
2033 // CHECK:   [[TMP9:%.*]] = extractvalue %struct.poly16x8x2_t [[TMP7]], 0
2034 // CHECK:   store [2 x <8 x i16>] [[TMP9]], [2 x <8 x i16>]* [[TMP8]], align 16
2035 // CHECK:   [[TMP10:%.*]] = load %struct.poly16x8x2_t, %struct.poly16x8x2_t* [[RETVAL]], align 16
2036 // CHECK:   ret %struct.poly16x8x2_t [[TMP10]]
test_vtrnq_p16(poly16x8_t a,poly16x8_t b)2037 poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
2038   return vtrnq_p16(a, b);
2039 }
2040