1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
4 
5 #include <arm_mve.h>
6 
7 // CHECK-LABEL: @test_vhaddq_u8(
8 // CHECK-NEXT:  entry:
9 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhadd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1)
10 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
11 //
test_vhaddq_u8(uint8x16_t a,uint8x16_t b)12 uint8x16_t test_vhaddq_u8(uint8x16_t a, uint8x16_t b)
13 {
14 #ifdef POLYMORPHIC
15     return vhaddq(a, b);
16 #else /* POLYMORPHIC */
17     return vhaddq_u8(a, b);
18 #endif /* POLYMORPHIC */
19 }
20 
21 // CHECK-LABEL: @test_vhaddq_s16(
22 // CHECK-NEXT:  entry:
23 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0)
24 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
25 //
test_vhaddq_s16(int16x8_t a,int16x8_t b)26 int16x8_t test_vhaddq_s16(int16x8_t a, int16x8_t b)
27 {
28 #ifdef POLYMORPHIC
29     return vhaddq(a, b);
30 #else /* POLYMORPHIC */
31     return vhaddq_s16(a, b);
32 #endif /* POLYMORPHIC */
33 }
34 
35 // CHECK-LABEL: @test_vhaddq_u32(
36 // CHECK-NEXT:  entry:
37 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1)
38 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
39 //
test_vhaddq_u32(uint32x4_t a,uint32x4_t b)40 uint32x4_t test_vhaddq_u32(uint32x4_t a, uint32x4_t b)
41 {
42 #ifdef POLYMORPHIC
43     return vhaddq(a, b);
44 #else /* POLYMORPHIC */
45     return vhaddq_u32(a, b);
46 #endif /* POLYMORPHIC */
47 }
48 
49 // CHECK-LABEL: @test_vhaddq_m_s8(
50 // CHECK-NEXT:  entry:
51 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
52 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
53 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
54 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
55 //
test_vhaddq_m_s8(int8x16_t inactive,int8x16_t a,int8x16_t b,mve_pred16_t p)56 int8x16_t test_vhaddq_m_s8(int8x16_t inactive, int8x16_t a, int8x16_t b, mve_pred16_t p)
57 {
58 #ifdef POLYMORPHIC
59     return vhaddq_m(inactive, a, b, p);
60 #else /* POLYMORPHIC */
61     return vhaddq_m_s8(inactive, a, b, p);
62 #endif /* POLYMORPHIC */
63 }
64 
65 // CHECK-LABEL: @test_vhaddq_m_u16(
66 // CHECK-NEXT:  entry:
67 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
68 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
69 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
70 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
71 //
test_vhaddq_m_u16(uint16x8_t inactive,uint16x8_t a,uint16x8_t b,mve_pred16_t p)72 uint16x8_t test_vhaddq_m_u16(uint16x8_t inactive, uint16x8_t a, uint16x8_t b, mve_pred16_t p)
73 {
74 #ifdef POLYMORPHIC
75     return vhaddq_m(inactive, a, b, p);
76 #else /* POLYMORPHIC */
77     return vhaddq_m_u16(inactive, a, b, p);
78 #endif /* POLYMORPHIC */
79 }
80 
81 // CHECK-LABEL: @test_vhaddq_m_s32(
82 // CHECK-NEXT:  entry:
83 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
84 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
85 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
86 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
87 //
test_vhaddq_m_s32(int32x4_t inactive,int32x4_t a,int32x4_t b,mve_pred16_t p)88 int32x4_t test_vhaddq_m_s32(int32x4_t inactive, int32x4_t a, int32x4_t b, mve_pred16_t p)
89 {
90 #ifdef POLYMORPHIC
91     return vhaddq_m(inactive, a, b, p);
92 #else /* POLYMORPHIC */
93     return vhaddq_m_s32(inactive, a, b, p);
94 #endif /* POLYMORPHIC */
95 }
96 
97 // CHECK-LABEL: @test_vhaddq_x_u8(
98 // CHECK-NEXT:  entry:
99 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
100 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
101 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
102 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
103 //
test_vhaddq_x_u8(uint8x16_t a,uint8x16_t b,mve_pred16_t p)104 uint8x16_t test_vhaddq_x_u8(uint8x16_t a, uint8x16_t b, mve_pred16_t p)
105 {
106 #ifdef POLYMORPHIC
107     return vhaddq_x(a, b, p);
108 #else /* POLYMORPHIC */
109     return vhaddq_x_u8(a, b, p);
110 #endif /* POLYMORPHIC */
111 }
112 
113 // CHECK-LABEL: @test_vhaddq_x_s16(
114 // CHECK-NEXT:  entry:
115 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
116 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
117 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
118 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
119 //
test_vhaddq_x_s16(int16x8_t a,int16x8_t b,mve_pred16_t p)120 int16x8_t test_vhaddq_x_s16(int16x8_t a, int16x8_t b, mve_pred16_t p)
121 {
122 #ifdef POLYMORPHIC
123     return vhaddq_x(a, b, p);
124 #else /* POLYMORPHIC */
125     return vhaddq_x_s16(a, b, p);
126 #endif /* POLYMORPHIC */
127 }
128 
129 // CHECK-LABEL: @test_vhaddq_x_u32(
130 // CHECK-NEXT:  entry:
131 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
132 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
133 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
134 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
135 //
test_vhaddq_x_u32(uint32x4_t a,uint32x4_t b,mve_pred16_t p)136 uint32x4_t test_vhaddq_x_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p)
137 {
138 #ifdef POLYMORPHIC
139     return vhaddq_x(a, b, p);
140 #else /* POLYMORPHIC */
141     return vhaddq_x_u32(a, b, p);
142 #endif /* POLYMORPHIC */
143 }
144 
145 // CHECK-LABEL: @test_vhaddq_n_u8(
146 // CHECK-NEXT:  entry:
147 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
148 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
149 // CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vhadd.v16i8(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1)
150 // CHECK-NEXT:    ret <16 x i8> [[TMP0]]
151 //
test_vhaddq_n_u8(uint8x16_t a,uint8_t b)152 uint8x16_t test_vhaddq_n_u8(uint8x16_t a, uint8_t b)
153 {
154 #ifdef POLYMORPHIC
155     return vhaddq(a, b);
156 #else /* POLYMORPHIC */
157     return vhaddq_n_u8(a, b);
158 #endif /* POLYMORPHIC */
159 }
160 
161 // CHECK-LABEL: @test_vhaddq_n_s16(
162 // CHECK-NEXT:  entry:
163 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
164 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
165 // CHECK-NEXT:    [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vhadd.v8i16(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0)
166 // CHECK-NEXT:    ret <8 x i16> [[TMP0]]
167 //
test_vhaddq_n_s16(int16x8_t a,int16_t b)168 int16x8_t test_vhaddq_n_s16(int16x8_t a, int16_t b)
169 {
170 #ifdef POLYMORPHIC
171     return vhaddq(a, b);
172 #else /* POLYMORPHIC */
173     return vhaddq_n_s16(a, b);
174 #endif /* POLYMORPHIC */
175 }
176 
177 // CHECK-LABEL: @test_vhaddq_n_u32(
178 // CHECK-NEXT:  entry:
179 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
180 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
181 // CHECK-NEXT:    [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vhadd.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1)
182 // CHECK-NEXT:    ret <4 x i32> [[TMP0]]
183 //
test_vhaddq_n_u32(uint32x4_t a,uint32_t b)184 uint32x4_t test_vhaddq_n_u32(uint32x4_t a, uint32_t b)
185 {
186 #ifdef POLYMORPHIC
187     return vhaddq(a, b);
188 #else /* POLYMORPHIC */
189     return vhaddq_n_u32(a, b);
190 #endif /* POLYMORPHIC */
191 }
192 
193 // CHECK-LABEL: @test_vhaddq_m_n_s8(
194 // CHECK-NEXT:  entry:
195 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
196 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
197 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
198 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
199 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]])
200 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
201 //
test_vhaddq_m_n_s8(int8x16_t inactive,int8x16_t a,int8_t b,mve_pred16_t p)202 int8x16_t test_vhaddq_m_n_s8(int8x16_t inactive, int8x16_t a, int8_t b, mve_pred16_t p)
203 {
204 #ifdef POLYMORPHIC
205     return vhaddq_m(inactive, a, b, p);
206 #else /* POLYMORPHIC */
207     return vhaddq_m_n_s8(inactive, a, b, p);
208 #endif /* POLYMORPHIC */
209 }
210 
211 // CHECK-LABEL: @test_vhaddq_m_n_u16(
212 // CHECK-NEXT:  entry:
213 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
214 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
215 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
216 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
217 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]])
218 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
219 //
test_vhaddq_m_n_u16(uint16x8_t inactive,uint16x8_t a,uint16_t b,mve_pred16_t p)220 uint16x8_t test_vhaddq_m_n_u16(uint16x8_t inactive, uint16x8_t a, uint16_t b, mve_pred16_t p)
221 {
222 #ifdef POLYMORPHIC
223     return vhaddq_m(inactive, a, b, p);
224 #else /* POLYMORPHIC */
225     return vhaddq_m_n_u16(inactive, a, b, p);
226 #endif /* POLYMORPHIC */
227 }
228 
229 // CHECK-LABEL: @test_vhaddq_m_n_s32(
230 // CHECK-NEXT:  entry:
231 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
232 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
233 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
234 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
235 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]])
236 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
237 //
test_vhaddq_m_n_s32(int32x4_t inactive,int32x4_t a,int32_t b,mve_pred16_t p)238 int32x4_t test_vhaddq_m_n_s32(int32x4_t inactive, int32x4_t a, int32_t b, mve_pred16_t p)
239 {
240 #ifdef POLYMORPHIC
241     return vhaddq_m(inactive, a, b, p);
242 #else /* POLYMORPHIC */
243     return vhaddq_m_n_s32(inactive, a, b, p);
244 #endif /* POLYMORPHIC */
245 }
246 
247 // CHECK-LABEL: @test_vhaddq_x_n_u8(
248 // CHECK-NEXT:  entry:
249 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[B:%.*]], i32 0
250 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
251 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
252 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
253 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.hadd.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i8> [[DOTSPLAT]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef)
254 // CHECK-NEXT:    ret <16 x i8> [[TMP2]]
255 //
test_vhaddq_x_n_u8(uint8x16_t a,uint8_t b,mve_pred16_t p)256 uint8x16_t test_vhaddq_x_n_u8(uint8x16_t a, uint8_t b, mve_pred16_t p)
257 {
258 #ifdef POLYMORPHIC
259     return vhaddq_x(a, b, p);
260 #else /* POLYMORPHIC */
261     return vhaddq_x_n_u8(a, b, p);
262 #endif /* POLYMORPHIC */
263 }
264 
265 // CHECK-LABEL: @test_vhaddq_x_n_s16(
266 // CHECK-NEXT:  entry:
267 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[B:%.*]], i32 0
268 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
269 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
270 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
271 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.hadd.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i16> [[DOTSPLAT]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef)
272 // CHECK-NEXT:    ret <8 x i16> [[TMP2]]
273 //
test_vhaddq_x_n_s16(int16x8_t a,int16_t b,mve_pred16_t p)274 int16x8_t test_vhaddq_x_n_s16(int16x8_t a, int16_t b, mve_pred16_t p)
275 {
276 #ifdef POLYMORPHIC
277     return vhaddq_x(a, b, p);
278 #else /* POLYMORPHIC */
279     return vhaddq_x_n_s16(a, b, p);
280 #endif /* POLYMORPHIC */
281 }
282 
283 // CHECK-LABEL: @test_vhaddq_x_n_u32(
284 // CHECK-NEXT:  entry:
285 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i32 0
286 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
287 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
288 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
289 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.hadd.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i32> [[DOTSPLAT]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef)
290 // CHECK-NEXT:    ret <4 x i32> [[TMP2]]
291 //
test_vhaddq_x_n_u32(uint32x4_t a,uint32_t b,mve_pred16_t p)292 uint32x4_t test_vhaddq_x_n_u32(uint32x4_t a, uint32_t b, mve_pred16_t p)
293 {
294 #ifdef POLYMORPHIC
295     return vhaddq_x(a, b, p);
296 #else /* POLYMORPHIC */
297     return vhaddq_x_n_u32(a, b, p);
298 #endif /* POLYMORPHIC */
299 }
300