1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg -sroa | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg -sroa | FileCheck %s
4 
5 #include <arm_mve.h>
6 
7 // CHECK-LABEL: @test_vmlaldavaq_s16(
8 // CHECK-NEXT:  entry:
9 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
10 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
11 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
12 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
13 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
14 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
15 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
16 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
17 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
18 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
19 // CHECK-NEXT:    ret i64 [[TMP9]]
20 //
test_vmlaldavaq_s16(int64_t a,int16x8_t b,int16x8_t c)21 int64_t test_vmlaldavaq_s16(int64_t a, int16x8_t b, int16x8_t c) {
22 #ifdef POLYMORPHIC
23   return vmlaldavaq(a, b, c);
24 #else
25   return vmlaldavaq_s16(a, b, c);
26 #endif
27 }
28 
29 // CHECK-LABEL: @test_vmlaldavaq_s32(
30 // CHECK-NEXT:  entry:
31 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
32 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
33 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
34 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
35 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
36 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
37 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
38 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
39 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
40 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
41 // CHECK-NEXT:    ret i64 [[TMP9]]
42 //
test_vmlaldavaq_s32(int64_t a,int32x4_t b,int32x4_t c)43 int64_t test_vmlaldavaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
44 #ifdef POLYMORPHIC
45   return vmlaldavaq(a, b, c);
46 #else
47   return vmlaldavaq_s32(a, b, c);
48 #endif
49 }
50 
51 // CHECK-LABEL: @test_vmlaldavaq_u16(
52 // CHECK-NEXT:  entry:
53 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
54 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
55 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
56 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
57 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
58 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
59 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
60 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
61 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
62 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
63 // CHECK-NEXT:    ret i64 [[TMP9]]
64 //
test_vmlaldavaq_u16(uint64_t a,uint16x8_t b,uint16x8_t c)65 uint64_t test_vmlaldavaq_u16(uint64_t a, uint16x8_t b, uint16x8_t c) {
66 #ifdef POLYMORPHIC
67   return vmlaldavaq(a, b, c);
68 #else
69   return vmlaldavaq_u16(a, b, c);
70 #endif
71 }
72 
73 // CHECK-LABEL: @test_vmlaldavaq_u32(
74 // CHECK-NEXT:  entry:
75 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
76 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
77 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
78 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
79 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
80 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
81 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
82 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
83 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
84 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
85 // CHECK-NEXT:    ret i64 [[TMP9]]
86 //
test_vmlaldavaq_u32(uint64_t a,uint32x4_t b,uint32x4_t c)87 uint64_t test_vmlaldavaq_u32(uint64_t a, uint32x4_t b, uint32x4_t c) {
88 #ifdef POLYMORPHIC
89   return vmlaldavaq(a, b, c);
90 #else
91   return vmlaldavaq_u32(a, b, c);
92 #endif
93 }
94 
95 // CHECK-LABEL: @test_vmlaldavaxq_s16(
96 // CHECK-NEXT:  entry:
97 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
98 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
99 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
100 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
101 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
102 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
103 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
104 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
105 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
106 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
107 // CHECK-NEXT:    ret i64 [[TMP9]]
108 //
test_vmlaldavaxq_s16(int64_t a,int16x8_t b,int16x8_t c)109 int64_t test_vmlaldavaxq_s16(int64_t a, int16x8_t b, int16x8_t c) {
110 #ifdef POLYMORPHIC
111   return vmlaldavaxq(a, b, c);
112 #else
113   return vmlaldavaxq_s16(a, b, c);
114 #endif
115 }
116 
117 // CHECK-LABEL: @test_vmlaldavaxq_s32(
118 // CHECK-NEXT:  entry:
119 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
120 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
121 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
122 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
123 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
124 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
125 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
126 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
127 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
128 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
129 // CHECK-NEXT:    ret i64 [[TMP9]]
130 //
test_vmlaldavaxq_s32(int64_t a,int32x4_t b,int32x4_t c)131 int64_t test_vmlaldavaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
132 #ifdef POLYMORPHIC
133   return vmlaldavaxq(a, b, c);
134 #else
135   return vmlaldavaxq_s32(a, b, c);
136 #endif
137 }
138 
139 // CHECK-LABEL: @test_vmlsldavaq_s16(
140 // CHECK-NEXT:  entry:
141 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
142 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
143 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
144 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
145 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
146 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
147 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
148 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
149 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
150 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
151 // CHECK-NEXT:    ret i64 [[TMP9]]
152 //
test_vmlsldavaq_s16(int64_t a,int16x8_t b,int16x8_t c)153 int64_t test_vmlsldavaq_s16(int64_t a, int16x8_t b, int16x8_t c) {
154 #ifdef POLYMORPHIC
155   return vmlsldavaq(a, b, c);
156 #else
157   return vmlsldavaq_s16(a, b, c);
158 #endif
159 }
160 
161 // CHECK-LABEL: @test_vmlsldavaq_s32(
162 // CHECK-NEXT:  entry:
163 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
164 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
165 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
166 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
167 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
168 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
169 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
170 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
171 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
172 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
173 // CHECK-NEXT:    ret i64 [[TMP9]]
174 //
test_vmlsldavaq_s32(int64_t a,int32x4_t b,int32x4_t c)175 int64_t test_vmlsldavaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
176 #ifdef POLYMORPHIC
177   return vmlsldavaq(a, b, c);
178 #else
179   return vmlsldavaq_s32(a, b, c);
180 #endif
181 }
182 
183 // CHECK-LABEL: @test_vmlsldaxvaq_s16(
184 // CHECK-NEXT:  entry:
185 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
186 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
187 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
188 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]])
189 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
190 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
191 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
192 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
193 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
194 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
195 // CHECK-NEXT:    ret i64 [[TMP9]]
196 //
test_vmlsldaxvaq_s16(int64_t a,int16x8_t b,int16x8_t c)197 int64_t test_vmlsldaxvaq_s16(int64_t a, int16x8_t b, int16x8_t c) {
198 #ifdef POLYMORPHIC
199   return vmlsldavaxq(a, b, c);
200 #else
201   return vmlsldavaxq_s16(a, b, c);
202 #endif
203 }
204 
205 // CHECK-LABEL: @test_vmlsldavaxq_s32(
206 // CHECK-NEXT:  entry:
207 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
208 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
209 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
210 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
211 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
212 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
213 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
214 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
215 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
216 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
217 // CHECK-NEXT:    ret i64 [[TMP9]]
218 //
test_vmlsldavaxq_s32(int64_t a,int32x4_t b,int32x4_t c)219 int64_t test_vmlsldavaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
220 #ifdef POLYMORPHIC
221   return vmlsldavaxq(a, b, c);
222 #else
223   return vmlsldavaxq_s32(a, b, c);
224 #endif
225 }
226 
227 // CHECK-LABEL: @test_vrmlaldavhaq_s32(
228 // CHECK-NEXT:  entry:
229 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
230 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
231 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
232 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
233 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
234 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
235 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
236 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
237 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
238 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
239 // CHECK-NEXT:    ret i64 [[TMP9]]
240 //
test_vrmlaldavhaq_s32(int64_t a,int32x4_t b,int32x4_t c)241 int64_t test_vrmlaldavhaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
242 #ifdef POLYMORPHIC
243   return vrmlaldavhaq(a, b, c);
244 #else
245   return vrmlaldavhaq_s32(a, b, c);
246 #endif
247 }
248 
249 // CHECK-LABEL: @test_vrmlaldavhaq_u32(
250 // CHECK-NEXT:  entry:
251 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
252 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
253 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
254 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
255 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
256 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
257 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
258 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
259 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
260 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
261 // CHECK-NEXT:    ret i64 [[TMP9]]
262 //
test_vrmlaldavhaq_u32(uint64_t a,uint32x4_t b,uint32x4_t c)263 uint64_t test_vrmlaldavhaq_u32(uint64_t a, uint32x4_t b, uint32x4_t c) {
264 #ifdef POLYMORPHIC
265   return vrmlaldavhaq(a, b, c);
266 #else
267   return vrmlaldavhaq_u32(a, b, c);
268 #endif
269 }
270 
271 // CHECK-LABEL: @test_vrmlaldavhaxq_s32(
272 // CHECK-NEXT:  entry:
273 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
274 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
275 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
276 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
277 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
278 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
279 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
280 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
281 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
282 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
283 // CHECK-NEXT:    ret i64 [[TMP9]]
284 //
test_vrmlaldavhaxq_s32(int64_t a,int32x4_t b,int32x4_t c)285 int64_t test_vrmlaldavhaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
286 #ifdef POLYMORPHIC
287   return vrmlaldavhaxq(a, b, c);
288 #else
289   return vrmlaldavhaxq_s32(a, b, c);
290 #endif
291 }
292 
293 // CHECK-LABEL: @test_vrmlsldavhaq_s32(
294 // CHECK-NEXT:  entry:
295 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
296 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
297 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
298 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
299 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
300 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
301 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
302 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
303 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
304 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
305 // CHECK-NEXT:    ret i64 [[TMP9]]
306 //
test_vrmlsldavhaq_s32(int64_t a,int32x4_t b,int32x4_t c)307 int64_t test_vrmlsldavhaq_s32(int64_t a, int32x4_t b, int32x4_t c) {
308 #ifdef POLYMORPHIC
309   return vrmlsldavhaq(a, b, c);
310 #else
311   return vrmlsldavhaq_s32(a, b, c);
312 #endif
313 }
314 
315 // CHECK-LABEL: @test_vrmlsldavhaxq_s32(
316 // CHECK-NEXT:  entry:
317 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
318 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
319 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
320 // CHECK-NEXT:    [[TMP3:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]])
321 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP3]], 1
322 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
323 // CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[TMP5]], 32
324 // CHECK-NEXT:    [[TMP7:%.*]] = extractvalue { i32, i32 } [[TMP3]], 0
325 // CHECK-NEXT:    [[TMP8:%.*]] = zext i32 [[TMP7]] to i64
326 // CHECK-NEXT:    [[TMP9:%.*]] = or i64 [[TMP6]], [[TMP8]]
327 // CHECK-NEXT:    ret i64 [[TMP9]]
328 //
test_vrmlsldavhaxq_s32(int64_t a,int32x4_t b,int32x4_t c)329 int64_t test_vrmlsldavhaxq_s32(int64_t a, int32x4_t b, int32x4_t c) {
330 #ifdef POLYMORPHIC
331   return vrmlsldavhaxq(a, b, c);
332 #else
333   return vrmlsldavhaxq_s32(a, b, c);
334 #endif
335 }
336 
337 // CHECK-LABEL: @test_vmlaldavaq_p_s16(
338 // CHECK-NEXT:  entry:
339 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
340 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
341 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
342 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
343 // CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
344 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
345 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
346 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
347 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
348 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
349 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
350 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
351 // CHECK-NEXT:    ret i64 [[TMP11]]
352 //
test_vmlaldavaq_p_s16(int64_t a,int16x8_t b,int16x8_t c,mve_pred16_t p)353 int64_t test_vmlaldavaq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
354 #ifdef POLYMORPHIC
355   return vmlaldavaq_p(a, b, c, p);
356 #else
357   return vmlaldavaq_p_s16(a, b, c, p);
358 #endif
359 }
360 
361 // CHECK-LABEL: @test_vmlaldavaq_p_s32(
362 // CHECK-NEXT:  entry:
363 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
364 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
365 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
366 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
367 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
368 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
369 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
370 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
371 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
372 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
373 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
374 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
375 // CHECK-NEXT:    ret i64 [[TMP11]]
376 //
test_vmlaldavaq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)377 int64_t test_vmlaldavaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
378 #ifdef POLYMORPHIC
379   return vmlaldavaq_p(a, b, c, p);
380 #else
381   return vmlaldavaq_p_s32(a, b, c, p);
382 #endif
383 }
384 
385 // CHECK-LABEL: @test_vmlaldavaq_p_u16(
386 // CHECK-NEXT:  entry:
387 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
388 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
389 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
390 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
391 // CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
392 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
393 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
394 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
395 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
396 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
397 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
398 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
399 // CHECK-NEXT:    ret i64 [[TMP11]]
400 //
test_vmlaldavaq_p_u16(uint64_t a,uint16x8_t b,uint16x8_t c,mve_pred16_t p)401 uint64_t test_vmlaldavaq_p_u16(uint64_t a, uint16x8_t b, uint16x8_t c, mve_pred16_t p) {
402 #ifdef POLYMORPHIC
403   return vmlaldavaq_p(a, b, c, p);
404 #else
405   return vmlaldavaq_p_u16(a, b, c, p);
406 #endif
407 }
408 
409 // CHECK-LABEL: @test_vmlaldavaq_p_u32(
410 // CHECK-NEXT:  entry:
411 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
412 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
413 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
414 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
415 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
416 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
417 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
418 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
419 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
420 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
421 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
422 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
423 // CHECK-NEXT:    ret i64 [[TMP11]]
424 //
test_vmlaldavaq_p_u32(uint64_t a,uint32x4_t b,uint32x4_t c,mve_pred16_t p)425 uint64_t test_vmlaldavaq_p_u32(uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p) {
426 #ifdef POLYMORPHIC
427   return vmlaldavaq_p(a, b, c, p);
428 #else
429   return vmlaldavaq_p_u32(a, b, c, p);
430 #endif
431 }
432 
433 // CHECK-LABEL: @test_vmlaldavaxq_p_s16(
434 // CHECK-NEXT:  entry:
435 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
436 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
437 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
438 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
439 // CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
440 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
441 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
442 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
443 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
444 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
445 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
446 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
447 // CHECK-NEXT:    ret i64 [[TMP11]]
448 //
test_vmlaldavaxq_p_s16(int64_t a,int16x8_t b,int16x8_t c,mve_pred16_t p)449 int64_t test_vmlaldavaxq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
450 #ifdef POLYMORPHIC
451   return vmlaldavaxq_p(a, b, c, p);
452 #else
453   return vmlaldavaxq_p_s16(a, b, c, p);
454 #endif
455 }
456 
457 // CHECK-LABEL: @test_vmlaldavaxq_p_s32(
458 // CHECK-NEXT:  entry:
459 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
460 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
461 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
462 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
463 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
464 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
465 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
466 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
467 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
468 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
469 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
470 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
471 // CHECK-NEXT:    ret i64 [[TMP11]]
472 //
test_vmlaldavaxq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)473 int64_t test_vmlaldavaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
474 #ifdef POLYMORPHIC
475   return vmlaldavaxq_p(a, b, c, p);
476 #else
477   return vmlaldavaxq_p_s32(a, b, c, p);
478 #endif
479 }
480 
481 // CHECK-LABEL: @test_vmlsldavaq_p_s16(
482 // CHECK-NEXT:  entry:
483 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
484 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
485 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
486 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
487 // CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
488 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
489 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
490 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
491 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
492 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
493 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
494 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
495 // CHECK-NEXT:    ret i64 [[TMP11]]
496 //
test_vmlsldavaq_p_s16(int64_t a,int16x8_t b,int16x8_t c,mve_pred16_t p)497 int64_t test_vmlsldavaq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
498 #ifdef POLYMORPHIC
499   return vmlsldavaq_p(a, b, c, p);
500 #else
501   return vmlsldavaq_p_s16(a, b, c, p);
502 #endif
503 }
504 
505 // CHECK-LABEL: @test_vmlsldavaq_p_s32(
506 // CHECK-NEXT:  entry:
507 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
508 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
509 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
510 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
511 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
512 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
513 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
514 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
515 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
516 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
517 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
518 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
519 // CHECK-NEXT:    ret i64 [[TMP11]]
520 //
test_vmlsldavaq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)521 int64_t test_vmlsldavaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
522 #ifdef POLYMORPHIC
523   return vmlsldavaq_p(a, b, c, p);
524 #else
525   return vmlsldavaq_p_s32(a, b, c, p);
526 #endif
527 }
528 
529 // CHECK-LABEL: @test_vmlsldaxvaq_p_s16(
530 // CHECK-NEXT:  entry:
531 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
532 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
533 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
534 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
535 // CHECK-NEXT:    [[TMP4:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP3]])
536 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <8 x i16> [[B:%.*]], <8 x i16> [[C:%.*]], <8 x i1> [[TMP4]])
537 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
538 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
539 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
540 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
541 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
542 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
543 // CHECK-NEXT:    ret i64 [[TMP11]]
544 //
test_vmlsldaxvaq_p_s16(int64_t a,int16x8_t b,int16x8_t c,mve_pred16_t p)545 int64_t test_vmlsldaxvaq_p_s16(int64_t a, int16x8_t b, int16x8_t c, mve_pred16_t p) {
546 #ifdef POLYMORPHIC
547   return vmlsldavaxq_p(a, b, c, p);
548 #else
549   return vmlsldavaxq_p_s16(a, b, c, p);
550 #endif
551 }
552 
553 // CHECK-LABEL: @test_vmlsldavaxq_p_s32(
554 // CHECK-NEXT:  entry:
555 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
556 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
557 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
558 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
559 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
560 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
561 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
562 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
563 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
564 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
565 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
566 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
567 // CHECK-NEXT:    ret i64 [[TMP11]]
568 //
test_vmlsldavaxq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)569 int64_t test_vmlsldavaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
570 #ifdef POLYMORPHIC
571   return vmlsldavaxq_p(a, b, c, p);
572 #else
573   return vmlsldavaxq_p_s32(a, b, c, p);
574 #endif
575 }
576 
577 // CHECK-LABEL: @test_vrmlaldavhaq_p_s32(
578 // CHECK-NEXT:  entry:
579 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
580 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
581 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
582 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
583 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
584 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
585 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
586 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
587 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
588 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
589 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
590 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
591 // CHECK-NEXT:    ret i64 [[TMP11]]
592 //
test_vrmlaldavhaq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)593 int64_t test_vrmlaldavhaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
594 #ifdef POLYMORPHIC
595   return vrmlaldavhaq_p(a, b, c, p);
596 #else
597   return vrmlaldavhaq_p_s32(a, b, c, p);
598 #endif
599 }
600 
601 // CHECK-LABEL: @test_vrmlaldavhaq_p_u32(
602 // CHECK-NEXT:  entry:
603 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
604 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
605 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
606 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
607 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
608 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
609 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
610 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
611 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
612 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
613 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
614 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
615 // CHECK-NEXT:    ret i64 [[TMP11]]
616 //
test_vrmlaldavhaq_p_u32(uint64_t a,uint32x4_t b,uint32x4_t c,mve_pred16_t p)617 uint64_t test_vrmlaldavhaq_p_u32(uint64_t a, uint32x4_t b, uint32x4_t c, mve_pred16_t p) {
618 #ifdef POLYMORPHIC
619   return vrmlaldavhaq_p(a, b, c, p);
620 #else
621   return vrmlaldavhaq_p_u32(a, b, c, p);
622 #endif
623 }
624 
625 // CHECK-LABEL: @test_vrmlaldavhaxq_p_s32(
626 // CHECK-NEXT:  entry:
627 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
628 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
629 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
630 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
631 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
632 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
633 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
634 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
635 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
636 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
637 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
638 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
639 // CHECK-NEXT:    ret i64 [[TMP11]]
640 //
test_vrmlaldavhaxq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)641 int64_t test_vrmlaldavhaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
642 #ifdef POLYMORPHIC
643   return vrmlaldavhaxq_p(a, b, c, p);
644 #else
645   return vrmlaldavhaxq_p_s32(a, b, c, p);
646 #endif
647 }
648 
649 // CHECK-LABEL: @test_vrmlsldavhaq_p_s32(
650 // CHECK-NEXT:  entry:
651 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
652 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
653 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
654 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
655 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
656 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
657 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
658 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
659 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
660 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
661 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
662 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
663 // CHECK-NEXT:    ret i64 [[TMP11]]
664 //
test_vrmlsldavhaq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)665 int64_t test_vrmlsldavhaq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
666 #ifdef POLYMORPHIC
667   return vrmlsldavhaq_p(a, b, c, p);
668 #else
669   return vrmlsldavhaq_p_s32(a, b, c, p);
670 #endif
671 }
672 
673 // CHECK-LABEL: @test_vrmlsldavhaxq_p_s32(
674 // CHECK-NEXT:  entry:
675 // CHECK-NEXT:    [[TMP0:%.*]] = lshr i64 [[A:%.*]], 32
676 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
677 // CHECK-NEXT:    [[TMP2:%.*]] = trunc i64 [[A]] to i32
678 // CHECK-NEXT:    [[TMP3:%.*]] = zext i16 [[P:%.*]] to i32
679 // CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP3]])
680 // CHECK-NEXT:    [[TMP5:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 [[TMP2]], i32 [[TMP1]], <4 x i32> [[B:%.*]], <4 x i32> [[C:%.*]], <4 x i1> [[TMP4]])
681 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP5]], 1
682 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
683 // CHECK-NEXT:    [[TMP8:%.*]] = shl i64 [[TMP7]], 32
684 // CHECK-NEXT:    [[TMP9:%.*]] = extractvalue { i32, i32 } [[TMP5]], 0
685 // CHECK-NEXT:    [[TMP10:%.*]] = zext i32 [[TMP9]] to i64
686 // CHECK-NEXT:    [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP10]]
687 // CHECK-NEXT:    ret i64 [[TMP11]]
688 //
test_vrmlsldavhaxq_p_s32(int64_t a,int32x4_t b,int32x4_t c,mve_pred16_t p)689 int64_t test_vrmlsldavhaxq_p_s32(int64_t a, int32x4_t b, int32x4_t c, mve_pred16_t p) {
690 #ifdef POLYMORPHIC
691   return vrmlsldavhaxq_p(a, b, c, p);
692 #else
693   return vrmlsldavhaxq_p_s32(a, b, c, p);
694 #endif
695 }
696 
697 // CHECK-LABEL: @test_vmlaldavq_s16(
698 // CHECK-NEXT:  entry:
699 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
700 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
701 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
702 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
703 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
704 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
705 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
706 // CHECK-NEXT:    ret i64 [[TMP6]]
707 //
test_vmlaldavq_s16(int16x8_t a,int16x8_t b)708 int64_t test_vmlaldavq_s16(int16x8_t a, int16x8_t b) {
709 #ifdef POLYMORPHIC
710   return vmlaldavq(a, b);
711 #else
712   return vmlaldavq_s16(a, b);
713 #endif
714 }
715 
716 // CHECK-LABEL: @test_vmlaldavq_s32(
717 // CHECK-NEXT:  entry:
718 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
719 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
720 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
721 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
722 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
723 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
724 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
725 // CHECK-NEXT:    ret i64 [[TMP6]]
726 //
test_vmlaldavq_s32(int32x4_t a,int32x4_t b)727 int64_t test_vmlaldavq_s32(int32x4_t a, int32x4_t b) {
728 #ifdef POLYMORPHIC
729   return vmlaldavq(a, b);
730 #else
731   return vmlaldavq_s32(a, b);
732 #endif
733 }
734 
735 // CHECK-LABEL: @test_vmlaldavq_u16(
736 // CHECK-NEXT:  entry:
737 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
738 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
739 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
740 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
741 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
742 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
743 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
744 // CHECK-NEXT:    ret i64 [[TMP6]]
745 //
test_vmlaldavq_u16(uint16x8_t a,uint16x8_t b)746 uint64_t test_vmlaldavq_u16(uint16x8_t a, uint16x8_t b) {
747 #ifdef POLYMORPHIC
748   return vmlaldavq(a, b);
749 #else
750   return vmlaldavq_u16(a, b);
751 #endif
752 }
753 
754 // CHECK-LABEL: @test_vmlaldavq_u32(
755 // CHECK-NEXT:  entry:
756 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
757 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
758 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
759 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
760 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
761 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
762 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
763 // CHECK-NEXT:    ret i64 [[TMP6]]
764 //
test_vmlaldavq_u32(uint32x4_t a,uint32x4_t b)765 uint64_t test_vmlaldavq_u32(uint32x4_t a, uint32x4_t b) {
766 #ifdef POLYMORPHIC
767   return vmlaldavq(a, b);
768 #else
769   return vmlaldavq_u32(a, b);
770 #endif
771 }
772 
773 // CHECK-LABEL: @test_vmlaldavxq_s16(
774 // CHECK-NEXT:  entry:
775 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
776 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
777 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
778 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
779 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
780 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
781 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
782 // CHECK-NEXT:    ret i64 [[TMP6]]
783 //
test_vmlaldavxq_s16(int16x8_t a,int16x8_t b)784 int64_t test_vmlaldavxq_s16(int16x8_t a, int16x8_t b) {
785 #ifdef POLYMORPHIC
786   return vmlaldavxq(a, b);
787 #else
788   return vmlaldavxq_s16(a, b);
789 #endif
790 }
791 
792 // CHECK-LABEL: @test_vmlaldavxq_s32(
793 // CHECK-NEXT:  entry:
794 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
795 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
796 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
797 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
798 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
799 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
800 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
801 // CHECK-NEXT:    ret i64 [[TMP6]]
802 //
test_vmlaldavxq_s32(int32x4_t a,int32x4_t b)803 int64_t test_vmlaldavxq_s32(int32x4_t a, int32x4_t b) {
804 #ifdef POLYMORPHIC
805   return vmlaldavxq(a, b);
806 #else
807   return vmlaldavxq_s32(a, b);
808 #endif
809 }
810 
811 // CHECK-LABEL: @test_vmlsldavq_s16(
812 // CHECK-NEXT:  entry:
813 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
814 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
815 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
816 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
817 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
818 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
819 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
820 // CHECK-NEXT:    ret i64 [[TMP6]]
821 //
test_vmlsldavq_s16(int16x8_t a,int16x8_t b)822 int64_t test_vmlsldavq_s16(int16x8_t a, int16x8_t b) {
823 #ifdef POLYMORPHIC
824   return vmlsldavq(a, b);
825 #else
826   return vmlsldavq_s16(a, b);
827 #endif
828 }
829 
830 // CHECK-LABEL: @test_vmlsldavq_s32(
831 // CHECK-NEXT:  entry:
832 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
833 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
834 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
835 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
836 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
837 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
838 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
839 // CHECK-NEXT:    ret i64 [[TMP6]]
840 //
test_vmlsldavq_s32(int32x4_t a,int32x4_t b)841 int64_t test_vmlsldavq_s32(int32x4_t a, int32x4_t b) {
842 #ifdef POLYMORPHIC
843   return vmlsldavq(a, b);
844 #else
845   return vmlsldavq_s32(a, b);
846 #endif
847 }
848 
849 // CHECK-LABEL: @test_vmlsldavxvq_s16(
850 // CHECK-NEXT:  entry:
851 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]])
852 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
853 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
854 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
855 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
856 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
857 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
858 // CHECK-NEXT:    ret i64 [[TMP6]]
859 //
test_vmlsldavxvq_s16(int16x8_t a,int16x8_t b)860 int64_t test_vmlsldavxvq_s16(int16x8_t a, int16x8_t b) {
861 #ifdef POLYMORPHIC
862   return vmlsldavxq(a, b);
863 #else
864   return vmlsldavxq_s16(a, b);
865 #endif
866 }
867 
868 // CHECK-LABEL: @test_vmlsldavxq_s32(
869 // CHECK-NEXT:  entry:
870 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.v4i32(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
871 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
872 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
873 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
874 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
875 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
876 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
877 // CHECK-NEXT:    ret i64 [[TMP6]]
878 //
test_vmlsldavxq_s32(int32x4_t a,int32x4_t b)879 int64_t test_vmlsldavxq_s32(int32x4_t a, int32x4_t b) {
880 #ifdef POLYMORPHIC
881   return vmlsldavxq(a, b);
882 #else
883   return vmlsldavxq_s32(a, b);
884 #endif
885 }
886 
887 // CHECK-LABEL: @test_vrmlaldavhq_s32(
888 // CHECK-NEXT:  entry:
889 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
890 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
891 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
892 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
893 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
894 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
895 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
896 // CHECK-NEXT:    ret i64 [[TMP6]]
897 //
test_vrmlaldavhq_s32(int32x4_t a,int32x4_t b)898 int64_t test_vrmlaldavhq_s32(int32x4_t a, int32x4_t b) {
899 #ifdef POLYMORPHIC
900   return vrmlaldavhq(a, b);
901 #else
902   return vrmlaldavhq_s32(a, b);
903 #endif
904 }
905 
906 // CHECK-LABEL: @test_vrmlaldavhq_u32(
907 // CHECK-NEXT:  entry:
908 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
909 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
910 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
911 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
912 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
913 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
914 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
915 // CHECK-NEXT:    ret i64 [[TMP6]]
916 //
test_vrmlaldavhq_u32(uint32x4_t a,uint32x4_t b)917 uint64_t test_vrmlaldavhq_u32(uint32x4_t a, uint32x4_t b) {
918 #ifdef POLYMORPHIC
919   return vrmlaldavhq(a, b);
920 #else
921   return vrmlaldavhq_u32(a, b);
922 #endif
923 }
924 
925 // CHECK-LABEL: @test_vrmlaldavhxq_s32(
926 // CHECK-NEXT:  entry:
927 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
928 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
929 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
930 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
931 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
932 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
933 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
934 // CHECK-NEXT:    ret i64 [[TMP6]]
935 //
test_vrmlaldavhxq_s32(int32x4_t a,int32x4_t b)936 int64_t test_vrmlaldavhxq_s32(int32x4_t a, int32x4_t b) {
937 #ifdef POLYMORPHIC
938   return vrmlaldavhxq(a, b);
939 #else
940   return vrmlaldavhxq_s32(a, b);
941 #endif
942 }
943 
944 // CHECK-LABEL: @test_vrmlsldavhq_s32(
945 // CHECK-NEXT:  entry:
946 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
947 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
948 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
949 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
950 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
951 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
952 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
953 // CHECK-NEXT:    ret i64 [[TMP6]]
954 //
test_vrmlsldavhq_s32(int32x4_t a,int32x4_t b)955 int64_t test_vrmlsldavhq_s32(int32x4_t a, int32x4_t b) {
956 #ifdef POLYMORPHIC
957   return vrmlsldavhq(a, b);
958 #else
959   return vrmlsldavhq_s32(a, b);
960 #endif
961 }
962 
963 // CHECK-LABEL: @test_vrmlsldavhxq_s32(
964 // CHECK-NEXT:  entry:
965 // CHECK-NEXT:    [[TMP0:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.v4i32(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]])
966 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { i32, i32 } [[TMP0]], 1
967 // CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
968 // CHECK-NEXT:    [[TMP3:%.*]] = shl i64 [[TMP2]], 32
969 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { i32, i32 } [[TMP0]], 0
970 // CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
971 // CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[TMP3]], [[TMP5]]
972 // CHECK-NEXT:    ret i64 [[TMP6]]
973 //
test_vrmlsldavhxq_s32(int32x4_t a,int32x4_t b)974 int64_t test_vrmlsldavhxq_s32(int32x4_t a, int32x4_t b) {
975 #ifdef POLYMORPHIC
976   return vrmlsldavhxq(a, b);
977 #else
978   return vrmlsldavhxq_s32(a, b);
979 #endif
980 }
981 
982 // CHECK-LABEL: @test_vmlaldavq_p_s16(
983 // CHECK-NEXT:  entry:
984 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
985 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
986 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
987 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
988 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
989 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
990 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
991 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
992 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
993 // CHECK-NEXT:    ret i64 [[TMP8]]
994 //
test_vmlaldavq_p_s16(int16x8_t a,int16x8_t b,mve_pred16_t p)995 int64_t test_vmlaldavq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
996 #ifdef POLYMORPHIC
997   return vmlaldavq_p(a, b, p);
998 #else
999   return vmlaldavq_p_s16(a, b, p);
1000 #endif
1001 }
1002 
1003 // CHECK-LABEL: @test_vmlaldavq_p_s32(
1004 // CHECK-NEXT:  entry:
1005 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1006 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1007 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1008 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1009 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1010 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1011 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1012 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1013 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1014 // CHECK-NEXT:    ret i64 [[TMP8]]
1015 //
test_vmlaldavq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1016 int64_t test_vmlaldavq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1017 #ifdef POLYMORPHIC
1018   return vmlaldavq_p(a, b, p);
1019 #else
1020   return vmlaldavq_p_s32(a, b, p);
1021 #endif
1022 }
1023 
1024 // CHECK-LABEL: @test_vmlaldavq_p_u16(
1025 // CHECK-NEXT:  entry:
1026 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1027 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1028 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 1, i32 0, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1029 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1030 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1031 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1032 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1033 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1034 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1035 // CHECK-NEXT:    ret i64 [[TMP8]]
1036 //
test_vmlaldavq_p_u16(uint16x8_t a,uint16x8_t b,mve_pred16_t p)1037 uint64_t test_vmlaldavq_p_u16(uint16x8_t a, uint16x8_t b, mve_pred16_t p) {
1038 #ifdef POLYMORPHIC
1039   return vmlaldavq_p(a, b, p);
1040 #else
1041   return vmlaldavq_p_u16(a, b, p);
1042 #endif
1043 }
1044 
1045 // CHECK-LABEL: @test_vmlaldavq_p_u32(
1046 // CHECK-NEXT:  entry:
1047 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1048 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1049 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1050 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1051 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1052 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1053 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1054 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1055 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1056 // CHECK-NEXT:    ret i64 [[TMP8]]
1057 //
test_vmlaldavq_p_u32(uint32x4_t a,uint32x4_t b,mve_pred16_t p)1058 uint64_t test_vmlaldavq_p_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) {
1059 #ifdef POLYMORPHIC
1060   return vmlaldavq_p(a, b, p);
1061 #else
1062   return vmlaldavq_p_u32(a, b, p);
1063 #endif
1064 }
1065 
1066 // CHECK-LABEL: @test_vmlaldavxq_p_s16(
1067 // CHECK-NEXT:  entry:
1068 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1069 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1070 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 0, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1071 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1072 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1073 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1074 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1075 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1076 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1077 // CHECK-NEXT:    ret i64 [[TMP8]]
1078 //
test_vmlaldavxq_p_s16(int16x8_t a,int16x8_t b,mve_pred16_t p)1079 int64_t test_vmlaldavxq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
1080 #ifdef POLYMORPHIC
1081   return vmlaldavxq_p(a, b, p);
1082 #else
1083   return vmlaldavxq_p_s16(a, b, p);
1084 #endif
1085 }
1086 
1087 // CHECK-LABEL: @test_vmlaldavxq_p_s32(
1088 // CHECK-NEXT:  entry:
1089 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1090 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1091 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1092 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1093 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1094 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1095 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1096 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1097 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1098 // CHECK-NEXT:    ret i64 [[TMP8]]
1099 //
test_vmlaldavxq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1100 int64_t test_vmlaldavxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1101 #ifdef POLYMORPHIC
1102   return vmlaldavxq_p(a, b, p);
1103 #else
1104   return vmlaldavxq_p_s32(a, b, p);
1105 #endif
1106 }
1107 
1108 // CHECK-LABEL: @test_vmlsldavq_p_s16(
1109 // CHECK-NEXT:  entry:
1110 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1111 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1112 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 0, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1113 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1114 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1115 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1116 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1117 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1118 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1119 // CHECK-NEXT:    ret i64 [[TMP8]]
1120 //
test_vmlsldavq_p_s16(int16x8_t a,int16x8_t b,mve_pred16_t p)1121 int64_t test_vmlsldavq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
1122 #ifdef POLYMORPHIC
1123   return vmlsldavq_p(a, b, p);
1124 #else
1125   return vmlsldavq_p_s16(a, b, p);
1126 #endif
1127 }
1128 
1129 // CHECK-LABEL: @test_vmlsldavq_p_s32(
1130 // CHECK-NEXT:  entry:
1131 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1132 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1133 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1134 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1135 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1136 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1137 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1138 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1139 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1140 // CHECK-NEXT:    ret i64 [[TMP8]]
1141 //
test_vmlsldavq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1142 int64_t test_vmlsldavq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1143 #ifdef POLYMORPHIC
1144   return vmlsldavq_p(a, b, p);
1145 #else
1146   return vmlsldavq_p_s32(a, b, p);
1147 #endif
1148 }
1149 
1150 // CHECK-LABEL: @test_vmlsldaxvq_p_s16(
1151 // CHECK-NEXT:  entry:
1152 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1153 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1154 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v8i16.v8i1(i32 0, i32 1, i32 1, i32 0, i32 0, <8 x i16> [[A:%.*]], <8 x i16> [[B:%.*]], <8 x i1> [[TMP1]])
1155 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1156 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1157 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1158 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1159 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1160 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1161 // CHECK-NEXT:    ret i64 [[TMP8]]
1162 //
test_vmlsldaxvq_p_s16(int16x8_t a,int16x8_t b,mve_pred16_t p)1163 int64_t test_vmlsldaxvq_p_s16(int16x8_t a, int16x8_t b, mve_pred16_t p) {
1164 #ifdef POLYMORPHIC
1165   return vmlsldavxq_p(a, b, p);
1166 #else
1167   return vmlsldavxq_p_s16(a, b, p);
1168 #endif
1169 }
1170 
1171 // CHECK-LABEL: @test_vmlsldavxq_p_s32(
1172 // CHECK-NEXT:  entry:
1173 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1174 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1175 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vmlldava.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1176 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1177 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1178 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1179 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1180 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1181 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1182 // CHECK-NEXT:    ret i64 [[TMP8]]
1183 //
test_vmlsldavxq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1184 int64_t test_vmlsldavxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1185 #ifdef POLYMORPHIC
1186   return vmlsldavxq_p(a, b, p);
1187 #else
1188   return vmlsldavxq_p_s32(a, b, p);
1189 #endif
1190 }
1191 
1192 // CHECK-LABEL: @test_vrmlaldavhq_p_s32(
1193 // CHECK-NEXT:  entry:
1194 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1195 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1196 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1197 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1198 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1199 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1200 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1201 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1202 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1203 // CHECK-NEXT:    ret i64 [[TMP8]]
1204 //
test_vrmlaldavhq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1205 int64_t test_vrmlaldavhq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1206 #ifdef POLYMORPHIC
1207   return vrmlaldavhq_p(a, b, p);
1208 #else
1209   return vrmlaldavhq_p_s32(a, b, p);
1210 #endif
1211 }
1212 
1213 // CHECK-LABEL: @test_vrmlaldavhq_p_u32(
1214 // CHECK-NEXT:  entry:
1215 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1216 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1217 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 1, i32 0, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1218 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1219 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1220 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1221 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1222 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1223 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1224 // CHECK-NEXT:    ret i64 [[TMP8]]
1225 //
test_vrmlaldavhq_p_u32(uint32x4_t a,uint32x4_t b,mve_pred16_t p)1226 uint64_t test_vrmlaldavhq_p_u32(uint32x4_t a, uint32x4_t b, mve_pred16_t p) {
1227 #ifdef POLYMORPHIC
1228   return vrmlaldavhq_p(a, b, p);
1229 #else
1230   return vrmlaldavhq_p_u32(a, b, p);
1231 #endif
1232 }
1233 
1234 // CHECK-LABEL: @test_vrmlaldavhxq_p_s32(
1235 // CHECK-NEXT:  entry:
1236 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1237 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1238 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 0, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1239 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1240 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1241 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1242 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1243 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1244 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1245 // CHECK-NEXT:    ret i64 [[TMP8]]
1246 //
test_vrmlaldavhxq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1247 int64_t test_vrmlaldavhxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1248 #ifdef POLYMORPHIC
1249   return vrmlaldavhxq_p(a, b, p);
1250 #else
1251   return vrmlaldavhxq_p_s32(a, b, p);
1252 #endif
1253 }
1254 
1255 // CHECK-LABEL: @test_vrmlsldavhq_p_s32(
1256 // CHECK-NEXT:  entry:
1257 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1258 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1259 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 0, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1260 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1261 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1262 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1263 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1264 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1265 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1266 // CHECK-NEXT:    ret i64 [[TMP8]]
1267 //
test_vrmlsldavhq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1268 int64_t test_vrmlsldavhq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1269 #ifdef POLYMORPHIC
1270   return vrmlsldavhq_p(a, b, p);
1271 #else
1272   return vrmlsldavhq_p_s32(a, b, p);
1273 #endif
1274 }
1275 
1276 // CHECK-LABEL: @test_vrmlsldavhxq_p_s32(
1277 // CHECK-NEXT:  entry:
1278 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1279 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1280 // CHECK-NEXT:    [[TMP2:%.*]] = call { i32, i32 } @llvm.arm.mve.vrmlldavha.predicated.v4i32.v4i1(i32 0, i32 1, i32 1, i32 0, i32 0, <4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i1> [[TMP1]])
1281 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { i32, i32 } [[TMP2]], 1
1282 // CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1283 // CHECK-NEXT:    [[TMP5:%.*]] = shl i64 [[TMP4]], 32
1284 // CHECK-NEXT:    [[TMP6:%.*]] = extractvalue { i32, i32 } [[TMP2]], 0
1285 // CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
1286 // CHECK-NEXT:    [[TMP8:%.*]] = or i64 [[TMP5]], [[TMP7]]
1287 // CHECK-NEXT:    ret i64 [[TMP8]]
1288 //
test_vrmlsldavhxq_p_s32(int32x4_t a,int32x4_t b,mve_pred16_t p)1289 int64_t test_vrmlsldavhxq_p_s32(int32x4_t a, int32x4_t b, mve_pred16_t p) {
1290 #ifdef POLYMORPHIC
1291   return vrmlsldavhxq_p(a, b, p);
1292 #else
1293   return vrmlsldavhxq_p_s32(a, b, p);
1294 #endif
1295 }
1296