1 // RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
2 // RUN: -ffp-contract=fast -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \
3 // RUN: | FileCheck %s
4
5 // Test new aarch64 intrinsics with poly64
6
7 #include <arm_neon.h>
8
9 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vceq_p64(<1 x i64> %a, <1 x i64> %b) #0 {
10 // CHECK: [[CMP_I:%.*]] = icmp eq <1 x i64> %a, %b
11 // CHECK: [[SEXT_I:%.*]] = sext <1 x i1> [[CMP_I]] to <1 x i64>
12 // CHECK: ret <1 x i64> [[SEXT_I]]
test_vceq_p64(poly64x1_t a,poly64x1_t b)13 uint64x1_t test_vceq_p64(poly64x1_t a, poly64x1_t b) {
14 return vceq_p64(a, b);
15 }
16
17 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vceqq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
18 // CHECK: [[CMP_I:%.*]] = icmp eq <2 x i64> %a, %b
19 // CHECK: [[SEXT_I:%.*]] = sext <2 x i1> [[CMP_I]] to <2 x i64>
20 // CHECK: ret <2 x i64> [[SEXT_I]]
test_vceqq_p64(poly64x2_t a,poly64x2_t b)21 uint64x2_t test_vceqq_p64(poly64x2_t a, poly64x2_t b) {
22 return vceqq_p64(a, b);
23 }
24
25 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vtst_p64(<1 x i64> %a, <1 x i64> %b) #0 {
26 // CHECK: [[TMP4:%.*]] = and <1 x i64> %a, %b
27 // CHECK: [[TMP5:%.*]] = icmp ne <1 x i64> [[TMP4]], zeroinitializer
28 // CHECK: [[VTST_I:%.*]] = sext <1 x i1> [[TMP5]] to <1 x i64>
29 // CHECK: ret <1 x i64> [[VTST_I]]
test_vtst_p64(poly64x1_t a,poly64x1_t b)30 uint64x1_t test_vtst_p64(poly64x1_t a, poly64x1_t b) {
31 return vtst_p64(a, b);
32 }
33
34 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtstq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
35 // CHECK: [[TMP4:%.*]] = and <2 x i64> %a, %b
36 // CHECK: [[TMP5:%.*]] = icmp ne <2 x i64> [[TMP4]], zeroinitializer
37 // CHECK: [[VTST_I:%.*]] = sext <2 x i1> [[TMP5]] to <2 x i64>
38 // CHECK: ret <2 x i64> [[VTST_I]]
test_vtstq_p64(poly64x2_t a,poly64x2_t b)39 uint64x2_t test_vtstq_p64(poly64x2_t a, poly64x2_t b) {
40 return vtstq_p64(a, b);
41 }
42
43 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vbsl_p64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) #0 {
44 // CHECK: [[VBSL3_I:%.*]] = and <1 x i64> %a, %b
45 // CHECK: [[TMP3:%.*]] = xor <1 x i64> %a, <i64 -1>
46 // CHECK: [[VBSL4_I:%.*]] = and <1 x i64> [[TMP3]], %c
47 // CHECK: [[VBSL5_I:%.*]] = or <1 x i64> [[VBSL3_I]], [[VBSL4_I]]
48 // CHECK: ret <1 x i64> [[VBSL5_I]]
test_vbsl_p64(poly64x1_t a,poly64x1_t b,poly64x1_t c)49 poly64x1_t test_vbsl_p64(poly64x1_t a, poly64x1_t b, poly64x1_t c) {
50 return vbsl_p64(a, b, c);
51 }
52
53 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vbslq_p64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) #1 {
54 // CHECK: [[VBSL3_I:%.*]] = and <2 x i64> %a, %b
55 // CHECK: [[TMP3:%.*]] = xor <2 x i64> %a, <i64 -1, i64 -1>
56 // CHECK: [[VBSL4_I:%.*]] = and <2 x i64> [[TMP3]], %c
57 // CHECK: [[VBSL5_I:%.*]] = or <2 x i64> [[VBSL3_I]], [[VBSL4_I]]
58 // CHECK: ret <2 x i64> [[VBSL5_I]]
test_vbslq_p64(poly64x2_t a,poly64x2_t b,poly64x2_t c)59 poly64x2_t test_vbslq_p64(poly64x2_t a, poly64x2_t b, poly64x2_t c) {
60 return vbslq_p64(a, b, c);
61 }
62
63 // CHECK-LABEL: define{{.*}} i64 @test_vget_lane_p64(<1 x i64> %v) #0 {
64 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0
65 // CHECK: ret i64 [[VGET_LANE]]
test_vget_lane_p64(poly64x1_t v)66 poly64_t test_vget_lane_p64(poly64x1_t v) {
67 return vget_lane_p64(v, 0);
68 }
69
70 // CHECK-LABEL: define{{.*}} i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 {
71 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1
72 // CHECK: ret i64 [[VGETQ_LANE]]
test_vgetq_lane_p64(poly64x2_t v)73 poly64_t test_vgetq_lane_p64(poly64x2_t v) {
74 return vgetq_lane_p64(v, 1);
75 }
76
77 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 {
78 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0
79 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vset_lane_p64(poly64_t a,poly64x1_t v)80 poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) {
81 return vset_lane_p64(a, v, 0);
82 }
83
84 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 {
85 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1
86 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vsetq_lane_p64(poly64_t a,poly64x2_t v)87 poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) {
88 return vsetq_lane_p64(a, v, 1);
89 }
90
91 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 {
92 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
93 // CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64 [[VGET_LANE]], i32 0
94 // CHECK: ret <1 x i64> [[VSET_LANE]]
test_vcopy_lane_p64(poly64x1_t a,poly64x1_t b)95 poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) {
96 return vcopy_lane_p64(a, 0, b, 0);
97
98 }
99
100 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 {
101 // CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0
102 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGET_LANE]], i32 1
103 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vcopyq_lane_p64(poly64x2_t a,poly64x1_t b)104 poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) {
105 return vcopyq_lane_p64(a, 1, b, 0);
106 }
107
108 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
109 // CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1
110 // CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGETQ_LANE]], i32 1
111 // CHECK: ret <2 x i64> [[VSET_LANE]]
test_vcopyq_laneq_p64(poly64x2_t a,poly64x2_t b)112 poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) {
113 return vcopyq_laneq_p64(a, 1, b, 1);
114 }
115
116 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vcreate_p64(i64 %a) #0 {
117 // CHECK: [[TMP0:%.*]] = bitcast i64 %a to <1 x i64>
118 // CHECK: ret <1 x i64> [[TMP0]]
test_vcreate_p64(uint64_t a)119 poly64x1_t test_vcreate_p64(uint64_t a) {
120 return vcreate_p64(a);
121 }
122
123 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_n_p64(i64 %a) #0 {
124 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
125 // CHECK: ret <1 x i64> [[VECINIT_I]]
test_vdup_n_p64(poly64_t a)126 poly64x1_t test_vdup_n_p64(poly64_t a) {
127 return vdup_n_p64(a);
128 }
129 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_n_p64(i64 %a) #1 {
130 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
131 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
132 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vdupq_n_p64(poly64_t a)133 poly64x2_t test_vdupq_n_p64(poly64_t a) {
134 return vdupq_n_p64(a);
135 }
136
137 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vmov_n_p64(i64 %a) #0 {
138 // CHECK: [[VECINIT_I:%.*]] = insertelement <1 x i64> undef, i64 %a, i32 0
139 // CHECK: ret <1 x i64> [[VECINIT_I]]
test_vmov_n_p64(poly64_t a)140 poly64x1_t test_vmov_n_p64(poly64_t a) {
141 return vmov_n_p64(a);
142 }
143
144 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vmovq_n_p64(i64 %a) #1 {
145 // CHECK: [[VECINIT_I:%.*]] = insertelement <2 x i64> undef, i64 %a, i32 0
146 // CHECK: [[VECINIT1_I:%.*]] = insertelement <2 x i64> [[VECINIT_I]], i64 %a, i32 1
147 // CHECK: ret <2 x i64> [[VECINIT1_I]]
test_vmovq_n_p64(poly64_t a)148 poly64x2_t test_vmovq_n_p64(poly64_t a) {
149 return vmovq_n_p64(a);
150 }
151
152 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vdup_lane_p64(<1 x i64> %vec) #0 {
153 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
154 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
155 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <1 x i32> zeroinitializer
156 // CHECK: ret <1 x i64> [[LANE]]
test_vdup_lane_p64(poly64x1_t vec)157 poly64x1_t test_vdup_lane_p64(poly64x1_t vec) {
158 return vdup_lane_p64(vec, 0);
159 }
160
161 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_lane_p64(<1 x i64> %vec) #1 {
162 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> [[VEC:%.*]] to <8 x i8>
163 // CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
164 // CHECK: [[LANE:%.*]] = shufflevector <1 x i64> [[TMP1]], <1 x i64> [[TMP1]], <2 x i32> zeroinitializer
165 // CHECK: ret <2 x i64> [[LANE]]
test_vdupq_lane_p64(poly64x1_t vec)166 poly64x2_t test_vdupq_lane_p64(poly64x1_t vec) {
167 return vdupq_lane_p64(vec, 0);
168 }
169
170 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vdupq_laneq_p64(<2 x i64> %vec) #1 {
171 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> [[VEC:%.*]] to <16 x i8>
172 // CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
173 // CHECK: [[LANE:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> [[TMP1]], <2 x i32> <i32 1, i32 1>
174 // CHECK: ret <2 x i64> [[LANE]]
test_vdupq_laneq_p64(poly64x2_t vec)175 poly64x2_t test_vdupq_laneq_p64(poly64x2_t vec) {
176 return vdupq_laneq_p64(vec, 1);
177 }
178
179 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vcombine_p64(<1 x i64> %low, <1 x i64> %high) #1 {
180 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x i64> %low, <1 x i64> %high, <2 x i32> <i32 0, i32 1>
181 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vcombine_p64(poly64x1_t low,poly64x1_t high)182 poly64x2_t test_vcombine_p64(poly64x1_t low, poly64x1_t high) {
183 return vcombine_p64(low, high);
184 }
185
186 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vld1_p64(i64* %ptr) #0 {
187 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8*
188 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
189 // CHECK: [[TMP2:%.*]] = load <1 x i64>, <1 x i64>* [[TMP1]]
190 // CHECK: ret <1 x i64> [[TMP2]]
test_vld1_p64(poly64_t const * ptr)191 poly64x1_t test_vld1_p64(poly64_t const * ptr) {
192 return vld1_p64(ptr);
193 }
194
195 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vld1q_p64(i64* %ptr) #1 {
196 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8*
197 // CHECK: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
198 // CHECK: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]]
199 // CHECK: ret <2 x i64> [[TMP2]]
test_vld1q_p64(poly64_t const * ptr)200 poly64x2_t test_vld1q_p64(poly64_t const * ptr) {
201 return vld1q_p64(ptr);
202 }
203
204 // CHECK-LABEL: define{{.*}} void @test_vst1_p64(i64* %ptr, <1 x i64> %val) #0 {
205 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8*
206 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %val to <8 x i8>
207 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <1 x i64>*
208 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
209 // CHECK: store <1 x i64> [[TMP3]], <1 x i64>* [[TMP2]]
210 // CHECK: ret void
test_vst1_p64(poly64_t * ptr,poly64x1_t val)211 void test_vst1_p64(poly64_t * ptr, poly64x1_t val) {
212 return vst1_p64(ptr, val);
213 }
214
215 // CHECK-LABEL: define{{.*}} void @test_vst1q_p64(i64* %ptr, <2 x i64> %val) #1 {
216 // CHECK: [[TMP0:%.*]] = bitcast i64* %ptr to i8*
217 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %val to <16 x i8>
218 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP0]] to <2 x i64>*
219 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
220 // CHECK: store <2 x i64> [[TMP3]], <2 x i64>* [[TMP2]]
221 // CHECK: ret void
test_vst1q_p64(poly64_t * ptr,poly64x2_t val)222 void test_vst1q_p64(poly64_t * ptr, poly64x2_t val) {
223 return vst1q_p64(ptr, val);
224 }
225
226 // CHECK-LABEL: define{{.*}} %struct.poly64x1x2_t @test_vld2_p64(i64* %ptr) #2 {
227 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
228 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x2_t, align 8
229 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
230 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8*
231 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
232 // CHECK: [[VLD2:%.*]] = call { <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld2.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
233 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64> }*
234 // CHECK: store { <1 x i64>, <1 x i64> } [[VLD2]], { <1 x i64>, <1 x i64> }* [[TMP3]]
235 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x2_t* [[RETVAL]] to i8*
236 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x2_t* [[__RET]] to i8*
237 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 16, i1 false)
238 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[RETVAL]], align 8
239 // CHECK: ret %struct.poly64x1x2_t [[TMP6]]
test_vld2_p64(poly64_t const * ptr)240 poly64x1x2_t test_vld2_p64(poly64_t const * ptr) {
241 return vld2_p64(ptr);
242 }
243
244 // CHECK-LABEL: define{{.*}} %struct.poly64x2x2_t @test_vld2q_p64(i64* %ptr) #2 {
245 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
246 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x2_t, align 16
247 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
248 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8*
249 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
250 // CHECK: [[VLD2:%.*]] = call { <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld2.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
251 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64> }*
252 // CHECK: store { <2 x i64>, <2 x i64> } [[VLD2]], { <2 x i64>, <2 x i64> }* [[TMP3]]
253 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x2_t* [[RETVAL]] to i8*
254 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x2_t* [[__RET]] to i8*
255 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 32, i1 false)
256 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[RETVAL]], align 16
257 // CHECK: ret %struct.poly64x2x2_t [[TMP6]]
test_vld2q_p64(poly64_t const * ptr)258 poly64x2x2_t test_vld2q_p64(poly64_t const * ptr) {
259 return vld2q_p64(ptr);
260 }
261
262 // CHECK-LABEL: define{{.*}} %struct.poly64x1x3_t @test_vld3_p64(i64* %ptr) #2 {
263 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
264 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x3_t, align 8
265 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
266 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8*
267 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
268 // CHECK: [[VLD3:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld3.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
269 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64> }*
270 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64> } [[VLD3]], { <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
271 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x3_t* [[RETVAL]] to i8*
272 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x3_t* [[__RET]] to i8*
273 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 24, i1 false)
274 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[RETVAL]], align 8
275 // CHECK: ret %struct.poly64x1x3_t [[TMP6]]
test_vld3_p64(poly64_t const * ptr)276 poly64x1x3_t test_vld3_p64(poly64_t const * ptr) {
277 return vld3_p64(ptr);
278 }
279
280 // CHECK-LABEL: define{{.*}} %struct.poly64x2x3_t @test_vld3q_p64(i64* %ptr) #2 {
281 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
282 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x3_t, align 16
283 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
284 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8*
285 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
286 // CHECK: [[VLD3:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld3.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
287 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64> }*
288 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64> } [[VLD3]], { <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
289 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x3_t* [[RETVAL]] to i8*
290 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x3_t* [[__RET]] to i8*
291 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 48, i1 false)
292 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[RETVAL]], align 16
293 // CHECK: ret %struct.poly64x2x3_t [[TMP6]]
test_vld3q_p64(poly64_t const * ptr)294 poly64x2x3_t test_vld3q_p64(poly64_t const * ptr) {
295 return vld3q_p64(ptr);
296 }
297
298 // CHECK-LABEL: define{{.*}} %struct.poly64x1x4_t @test_vld4_p64(i64* %ptr) #2 {
299 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
300 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x1x4_t, align 8
301 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
302 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8*
303 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <1 x i64>*
304 // CHECK: [[VLD4:%.*]] = call { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } @llvm.aarch64.neon.ld4.v1i64.p0v1i64(<1 x i64>* [[TMP2]])
305 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }*
306 // CHECK: store { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> } [[VLD4]], { <1 x i64>, <1 x i64>, <1 x i64>, <1 x i64> }* [[TMP3]]
307 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x1x4_t* [[RETVAL]] to i8*
308 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x1x4_t* [[__RET]] to i8*
309 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP4]], i8* align 8 [[TMP5]], i64 32, i1 false)
310 // CHECK: [[TMP6:%.*]] = load %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[RETVAL]], align 8
311 // CHECK: ret %struct.poly64x1x4_t [[TMP6]]
test_vld4_p64(poly64_t const * ptr)312 poly64x1x4_t test_vld4_p64(poly64_t const * ptr) {
313 return vld4_p64(ptr);
314 }
315
316 // CHECK-LABEL: define{{.*}} %struct.poly64x2x4_t @test_vld4q_p64(i64* %ptr) #2 {
317 // CHECK: [[RETVAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
318 // CHECK: [[__RET:%.*]] = alloca %struct.poly64x2x4_t, align 16
319 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
320 // CHECK: [[TMP1:%.*]] = bitcast i64* %ptr to i8*
321 // CHECK: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <2 x i64>*
322 // CHECK: [[VLD4:%.*]] = call { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } @llvm.aarch64.neon.ld4.v2i64.p0v2i64(<2 x i64>* [[TMP2]])
323 // CHECK: [[TMP3:%.*]] = bitcast i8* [[TMP0]] to { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }*
324 // CHECK: store { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } [[VLD4]], { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> }* [[TMP3]]
325 // CHECK: [[TMP4:%.*]] = bitcast %struct.poly64x2x4_t* [[RETVAL]] to i8*
326 // CHECK: [[TMP5:%.*]] = bitcast %struct.poly64x2x4_t* [[__RET]] to i8*
327 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP4]], i8* align 16 [[TMP5]], i64 64, i1 false)
328 // CHECK: [[TMP6:%.*]] = load %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[RETVAL]], align 16
329 // CHECK: ret %struct.poly64x2x4_t [[TMP6]]
test_vld4q_p64(poly64_t const * ptr)330 poly64x2x4_t test_vld4q_p64(poly64_t const * ptr) {
331 return vld4q_p64(ptr);
332 }
333
334 // CHECK-LABEL: define{{.*}} void @test_vst2_p64(i64* %ptr, [2 x <1 x i64>] %val.coerce) #2 {
335 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x2_t, align 8
336 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x2_t, align 8
337 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[VAL]], i32 0, i32 0
338 // CHECK: store [2 x <1 x i64>] [[VAL]].coerce, [2 x <1 x i64>]* [[COERCE_DIVE]], align 8
339 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x2_t* [[__S1]] to i8*
340 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x2_t* [[VAL]] to i8*
341 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 16, i1 false)
342 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8*
343 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
344 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL1]], i64 0, i64 0
345 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
346 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
347 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x2_t, %struct.poly64x1x2_t* [[__S1]], i32 0, i32 0
348 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <1 x i64>], [2 x <1 x i64>]* [[VAL2]], i64 0, i64 1
349 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8
350 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
351 // CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
352 // CHECK: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
353 // CHECK: call void @llvm.aarch64.neon.st2.v1i64.p0i8(<1 x i64> [[TMP7]], <1 x i64> [[TMP8]], i8* [[TMP2]])
354 // CHECK: ret void
test_vst2_p64(poly64_t * ptr,poly64x1x2_t val)355 void test_vst2_p64(poly64_t * ptr, poly64x1x2_t val) {
356 return vst2_p64(ptr, val);
357 }
358
359 // CHECK-LABEL: define{{.*}} void @test_vst2q_p64(i64* %ptr, [2 x <2 x i64>] %val.coerce) #2 {
360 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x2_t, align 16
361 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x2_t, align 16
362 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[VAL]], i32 0, i32 0
363 // CHECK: store [2 x <2 x i64>] [[VAL]].coerce, [2 x <2 x i64>]* [[COERCE_DIVE]], align 16
364 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x2_t* [[__S1]] to i8*
365 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x2_t* [[VAL]] to i8*
366 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 32, i1 false)
367 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8*
368 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
369 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL1]], i64 0, i64 0
370 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
371 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
372 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x2_t, %struct.poly64x2x2_t* [[__S1]], i32 0, i32 0
373 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [2 x <2 x i64>], [2 x <2 x i64>]* [[VAL2]], i64 0, i64 1
374 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16
375 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
376 // CHECK: [[TMP7:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
377 // CHECK: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
378 // CHECK: call void @llvm.aarch64.neon.st2.v2i64.p0i8(<2 x i64> [[TMP7]], <2 x i64> [[TMP8]], i8* [[TMP2]])
379 // CHECK: ret void
test_vst2q_p64(poly64_t * ptr,poly64x2x2_t val)380 void test_vst2q_p64(poly64_t * ptr, poly64x2x2_t val) {
381 return vst2q_p64(ptr, val);
382 }
383
384 // CHECK-LABEL: define{{.*}} void @test_vst3_p64(i64* %ptr, [3 x <1 x i64>] %val.coerce) #2 {
385 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x3_t, align 8
386 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x3_t, align 8
387 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[VAL]], i32 0, i32 0
388 // CHECK: store [3 x <1 x i64>] [[VAL]].coerce, [3 x <1 x i64>]* [[COERCE_DIVE]], align 8
389 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x3_t* [[__S1]] to i8*
390 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x3_t* [[VAL]] to i8*
391 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 24, i1 false)
392 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8*
393 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
394 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL1]], i64 0, i64 0
395 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
396 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
397 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
398 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL2]], i64 0, i64 1
399 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8
400 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
401 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x3_t, %struct.poly64x1x3_t* [[__S1]], i32 0, i32 0
402 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <1 x i64>], [3 x <1 x i64>]* [[VAL4]], i64 0, i64 2
403 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8
404 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
405 // CHECK: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
406 // CHECK: [[TMP10:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
407 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
408 // CHECK: call void @llvm.aarch64.neon.st3.v1i64.p0i8(<1 x i64> [[TMP9]], <1 x i64> [[TMP10]], <1 x i64> [[TMP11]], i8* [[TMP2]])
409 // CHECK: ret void
test_vst3_p64(poly64_t * ptr,poly64x1x3_t val)410 void test_vst3_p64(poly64_t * ptr, poly64x1x3_t val) {
411 return vst3_p64(ptr, val);
412 }
413
414 // CHECK-LABEL: define{{.*}} void @test_vst3q_p64(i64* %ptr, [3 x <2 x i64>] %val.coerce) #2 {
415 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x3_t, align 16
416 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x3_t, align 16
417 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[VAL]], i32 0, i32 0
418 // CHECK: store [3 x <2 x i64>] [[VAL]].coerce, [3 x <2 x i64>]* [[COERCE_DIVE]], align 16
419 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x3_t* [[__S1]] to i8*
420 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x3_t* [[VAL]] to i8*
421 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 48, i1 false)
422 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8*
423 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
424 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL1]], i64 0, i64 0
425 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
426 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
427 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
428 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL2]], i64 0, i64 1
429 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16
430 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
431 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x3_t, %struct.poly64x2x3_t* [[__S1]], i32 0, i32 0
432 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [3 x <2 x i64>], [3 x <2 x i64>]* [[VAL4]], i64 0, i64 2
433 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16
434 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
435 // CHECK: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
436 // CHECK: [[TMP10:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
437 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
438 // CHECK: call void @llvm.aarch64.neon.st3.v2i64.p0i8(<2 x i64> [[TMP9]], <2 x i64> [[TMP10]], <2 x i64> [[TMP11]], i8* [[TMP2]])
439 // CHECK: ret void
test_vst3q_p64(poly64_t * ptr,poly64x2x3_t val)440 void test_vst3q_p64(poly64_t * ptr, poly64x2x3_t val) {
441 return vst3q_p64(ptr, val);
442 }
443
444 // CHECK-LABEL: define{{.*}} void @test_vst4_p64(i64* %ptr, [4 x <1 x i64>] %val.coerce) #2 {
445 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x1x4_t, align 8
446 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x1x4_t, align 8
447 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[VAL]], i32 0, i32 0
448 // CHECK: store [4 x <1 x i64>] [[VAL]].coerce, [4 x <1 x i64>]* [[COERCE_DIVE]], align 8
449 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x1x4_t* [[__S1]] to i8*
450 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x1x4_t* [[VAL]] to i8*
451 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP0]], i8* align 8 [[TMP1]], i64 32, i1 false)
452 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8*
453 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
454 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL1]], i64 0, i64 0
455 // CHECK: [[TMP3:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX]], align 8
456 // CHECK: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP3]] to <8 x i8>
457 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
458 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL2]], i64 0, i64 1
459 // CHECK: [[TMP5:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX3]], align 8
460 // CHECK: [[TMP6:%.*]] = bitcast <1 x i64> [[TMP5]] to <8 x i8>
461 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
462 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL4]], i64 0, i64 2
463 // CHECK: [[TMP7:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX5]], align 8
464 // CHECK: [[TMP8:%.*]] = bitcast <1 x i64> [[TMP7]] to <8 x i8>
465 // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x1x4_t, %struct.poly64x1x4_t* [[__S1]], i32 0, i32 0
466 // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <1 x i64>], [4 x <1 x i64>]* [[VAL6]], i64 0, i64 3
467 // CHECK: [[TMP9:%.*]] = load <1 x i64>, <1 x i64>* [[ARRAYIDX7]], align 8
468 // CHECK: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP9]] to <8 x i8>
469 // CHECK: [[TMP11:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x i64>
470 // CHECK: [[TMP12:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x i64>
471 // CHECK: [[TMP13:%.*]] = bitcast <8 x i8> [[TMP8]] to <1 x i64>
472 // CHECK: [[TMP14:%.*]] = bitcast <8 x i8> [[TMP10]] to <1 x i64>
473 // CHECK: call void @llvm.aarch64.neon.st4.v1i64.p0i8(<1 x i64> [[TMP11]], <1 x i64> [[TMP12]], <1 x i64> [[TMP13]], <1 x i64> [[TMP14]], i8* [[TMP2]])
474 // CHECK: ret void
test_vst4_p64(poly64_t * ptr,poly64x1x4_t val)475 void test_vst4_p64(poly64_t * ptr, poly64x1x4_t val) {
476 return vst4_p64(ptr, val);
477 }
478
479 // CHECK-LABEL: define{{.*}} void @test_vst4q_p64(i64* %ptr, [4 x <2 x i64>] %val.coerce) #2 {
480 // CHECK: [[VAL:%.*]] = alloca %struct.poly64x2x4_t, align 16
481 // CHECK: [[__S1:%.*]] = alloca %struct.poly64x2x4_t, align 16
482 // CHECK: [[COERCE_DIVE:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[VAL]], i32 0, i32 0
483 // CHECK: store [4 x <2 x i64>] [[VAL]].coerce, [4 x <2 x i64>]* [[COERCE_DIVE]], align 16
484 // CHECK: [[TMP0:%.*]] = bitcast %struct.poly64x2x4_t* [[__S1]] to i8*
485 // CHECK: [[TMP1:%.*]] = bitcast %struct.poly64x2x4_t* [[VAL]] to i8*
486 // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP0]], i8* align 16 [[TMP1]], i64 64, i1 false)
487 // CHECK: [[TMP2:%.*]] = bitcast i64* %ptr to i8*
488 // CHECK: [[VAL1:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
489 // CHECK: [[ARRAYIDX:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL1]], i64 0, i64 0
490 // CHECK: [[TMP3:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX]], align 16
491 // CHECK: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP3]] to <16 x i8>
492 // CHECK: [[VAL2:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
493 // CHECK: [[ARRAYIDX3:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL2]], i64 0, i64 1
494 // CHECK: [[TMP5:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX3]], align 16
495 // CHECK: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP5]] to <16 x i8>
496 // CHECK: [[VAL4:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
497 // CHECK: [[ARRAYIDX5:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL4]], i64 0, i64 2
498 // CHECK: [[TMP7:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX5]], align 16
499 // CHECK: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP7]] to <16 x i8>
500 // CHECK: [[VAL6:%.*]] = getelementptr inbounds %struct.poly64x2x4_t, %struct.poly64x2x4_t* [[__S1]], i32 0, i32 0
501 // CHECK: [[ARRAYIDX7:%.*]] = getelementptr inbounds [4 x <2 x i64>], [4 x <2 x i64>]* [[VAL6]], i64 0, i64 3
502 // CHECK: [[TMP9:%.*]] = load <2 x i64>, <2 x i64>* [[ARRAYIDX7]], align 16
503 // CHECK: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP9]] to <16 x i8>
504 // CHECK: [[TMP11:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x i64>
505 // CHECK: [[TMP12:%.*]] = bitcast <16 x i8> [[TMP6]] to <2 x i64>
506 // CHECK: [[TMP13:%.*]] = bitcast <16 x i8> [[TMP8]] to <2 x i64>
507 // CHECK: [[TMP14:%.*]] = bitcast <16 x i8> [[TMP10]] to <2 x i64>
508 // CHECK: call void @llvm.aarch64.neon.st4.v2i64.p0i8(<2 x i64> [[TMP11]], <2 x i64> [[TMP12]], <2 x i64> [[TMP13]], <2 x i64> [[TMP14]], i8* [[TMP2]])
509 // CHECK: ret void
test_vst4q_p64(poly64_t * ptr,poly64x2x4_t val)510 void test_vst4q_p64(poly64_t * ptr, poly64x2x4_t val) {
511 return vst4q_p64(ptr, val);
512 }
513
514 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vext_p64(<1 x i64> %a, <1 x i64> %b) #0 {
515 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
516 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
517 // CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
518 // CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
519 // CHECK: [[VEXT:%.*]] = shufflevector <1 x i64> [[TMP2]], <1 x i64> [[TMP3]], <1 x i32> zeroinitializer
520 // CHECK: ret <1 x i64> [[VEXT]]
test_vext_p64(poly64x1_t a,poly64x1_t b)521 poly64x1_t test_vext_p64(poly64x1_t a, poly64x1_t b) {
522 return vext_u64(a, b, 0);
523
524 }
525
526 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vextq_p64(<2 x i64> %a, <2 x i64> %b) #1 {
527 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
528 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
529 // CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
530 // CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
531 // CHECK: [[VEXT:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> [[TMP3]], <2 x i32> <i32 1, i32 2>
532 // CHECK: ret <2 x i64> [[VEXT]]
test_vextq_p64(poly64x2_t a,poly64x2_t b)533 poly64x2_t test_vextq_p64(poly64x2_t a, poly64x2_t b) {
534 return vextq_p64(a, b, 1);
535 }
536
537 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
538 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
539 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vzip1q_p64(poly64x2_t a,poly64x2_t b)540 poly64x2_t test_vzip1q_p64(poly64x2_t a, poly64x2_t b) {
541 return vzip1q_p64(a, b);
542 }
543
544 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vzip2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
545 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
546 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vzip2q_p64(poly64x2_t a,poly64x2_t b)547 poly64x2_t test_vzip2q_p64(poly64x2_t a, poly64x2_t b) {
548 return vzip2q_u64(a, b);
549 }
550
551 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
552 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
553 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vuzp1q_p64(poly64x2_t a,poly64x2_t b)554 poly64x2_t test_vuzp1q_p64(poly64x2_t a, poly64x2_t b) {
555 return vuzp1q_p64(a, b);
556 }
557
558 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vuzp2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
559 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
560 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vuzp2q_p64(poly64x2_t a,poly64x2_t b)561 poly64x2_t test_vuzp2q_p64(poly64x2_t a, poly64x2_t b) {
562 return vuzp2q_u64(a, b);
563 }
564
565 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn1q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
566 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 0, i32 2>
567 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vtrn1q_p64(poly64x2_t a,poly64x2_t b)568 poly64x2_t test_vtrn1q_p64(poly64x2_t a, poly64x2_t b) {
569 return vtrn1q_p64(a, b);
570 }
571
572 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vtrn2q_p64(<2 x i64> %a, <2 x i64> %b) #1 {
573 // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 3>
574 // CHECK: ret <2 x i64> [[SHUFFLE_I]]
test_vtrn2q_p64(poly64x2_t a,poly64x2_t b)575 poly64x2_t test_vtrn2q_p64(poly64x2_t a, poly64x2_t b) {
576 return vtrn2q_u64(a, b);
577 }
578
579 // CHECK-LABEL: define{{.*}} <1 x i64> @test_vsri_n_p64(<1 x i64> %a, <1 x i64> %b) #0 {
580 // CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8>
581 // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %b to <8 x i8>
582 // CHECK: [[VSRI_N:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
583 // CHECK: [[VSRI_N1:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
584 // CHECK: [[VSRI_N2:%.*]] = call <1 x i64> @llvm.aarch64.neon.vsri.v1i64(<1 x i64> [[VSRI_N]], <1 x i64> [[VSRI_N1]], i32 33)
585 // CHECK: ret <1 x i64> [[VSRI_N2]]
test_vsri_n_p64(poly64x1_t a,poly64x1_t b)586 poly64x1_t test_vsri_n_p64(poly64x1_t a, poly64x1_t b) {
587 return vsri_n_p64(a, b, 33);
588 }
589
590 // CHECK-LABEL: define{{.*}} <2 x i64> @test_vsriq_n_p64(<2 x i64> %a, <2 x i64> %b) #1 {
591 // CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8>
592 // CHECK: [[TMP1:%.*]] = bitcast <2 x i64> %b to <16 x i8>
593 // CHECK: [[VSRI_N:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64>
594 // CHECK: [[VSRI_N1:%.*]] = bitcast <16 x i8> [[TMP1]] to <2 x i64>
595 // CHECK: [[VSRI_N2:%.*]] = call <2 x i64> @llvm.aarch64.neon.vsri.v2i64(<2 x i64> [[VSRI_N]], <2 x i64> [[VSRI_N1]], i32 64)
596 // CHECK: ret <2 x i64> [[VSRI_N2]]
test_vsriq_n_p64(poly64x2_t a,poly64x2_t b)597 poly64x2_t test_vsriq_n_p64(poly64x2_t a, poly64x2_t b) {
598 return vsriq_n_p64(a, b, 64);
599 }
600
601 // CHECK: attributes #0 ={{.*}}"min-legal-vector-width"="64"
602 // CHECK: attributes #1 ={{.*}}"min-legal-vector-width"="128"
603 // CHECK: attributes #2 ={{.*}}"min-legal-vector-width"="0"
604