1 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
3 // RUN: %clang_cc1 -triple thumbv8.1m.main-none-none-eabi -target-feature +mve -mfloat-abi hard -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s
4 
5 #include <arm_mve.h>
6 
7 // CHECK-LABEL: @test_vidupq_n_u8(
8 // CHECK-NEXT:  entry:
9 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[A:%.*]], i32 4)
10 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
11 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
12 //
test_vidupq_n_u8(uint32_t a)13 uint8x16_t test_vidupq_n_u8(uint32_t a)
14 {
15 #ifdef POLYMORPHIC
16     return vidupq_u8(a, 4);
17 #else /* POLYMORPHIC */
18     return vidupq_n_u8(a, 4);
19 #endif /* POLYMORPHIC */
20 }
21 
22 // CHECK-LABEL: @test_vidupq_n_u16(
23 // CHECK-NEXT:  entry:
24 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[A:%.*]], i32 1)
25 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
26 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
27 //
test_vidupq_n_u16(uint32_t a)28 uint16x8_t test_vidupq_n_u16(uint32_t a)
29 {
30 #ifdef POLYMORPHIC
31     return vidupq_u16(a, 1);
32 #else /* POLYMORPHIC */
33     return vidupq_n_u16(a, 1);
34 #endif /* POLYMORPHIC */
35 }
36 
37 // CHECK-LABEL: @test_vidupq_n_u32(
38 // CHECK-NEXT:  entry:
39 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[A:%.*]], i32 4)
40 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
41 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
42 //
test_vidupq_n_u32(uint32_t a)43 uint32x4_t test_vidupq_n_u32(uint32_t a)
44 {
45 #ifdef POLYMORPHIC
46     return vidupq_u32(a, 4);
47 #else /* POLYMORPHIC */
48     return vidupq_n_u32(a, 4);
49 #endif /* POLYMORPHIC */
50 }
51 
52 // CHECK-LABEL: @test_vddupq_n_u8(
53 // CHECK-NEXT:  entry:
54 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[A:%.*]], i32 2)
55 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
56 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
57 //
test_vddupq_n_u8(uint32_t a)58 uint8x16_t test_vddupq_n_u8(uint32_t a)
59 {
60 #ifdef POLYMORPHIC
61     return vddupq_u8(a, 2);
62 #else /* POLYMORPHIC */
63     return vddupq_n_u8(a, 2);
64 #endif /* POLYMORPHIC */
65 }
66 
67 // CHECK-LABEL: @test_vddupq_n_u16(
68 // CHECK-NEXT:  entry:
69 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[A:%.*]], i32 4)
70 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
71 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
72 //
test_vddupq_n_u16(uint32_t a)73 uint16x8_t test_vddupq_n_u16(uint32_t a)
74 {
75 #ifdef POLYMORPHIC
76     return vddupq_u16(a, 4);
77 #else /* POLYMORPHIC */
78     return vddupq_n_u16(a, 4);
79 #endif /* POLYMORPHIC */
80 }
81 
82 // CHECK-LABEL: @test_vddupq_n_u32(
83 // CHECK-NEXT:  entry:
84 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[A:%.*]], i32 2)
85 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
86 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
87 //
test_vddupq_n_u32(uint32_t a)88 uint32x4_t test_vddupq_n_u32(uint32_t a)
89 {
90 #ifdef POLYMORPHIC
91     return vddupq_u32(a, 2);
92 #else /* POLYMORPHIC */
93     return vddupq_n_u32(a, 2);
94 #endif /* POLYMORPHIC */
95 }
96 
97 // CHECK-LABEL: @test_viwdupq_n_u8(
98 // CHECK-NEXT:  entry:
99 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
100 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
101 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
102 //
test_viwdupq_n_u8(uint32_t a,uint32_t b)103 uint8x16_t test_viwdupq_n_u8(uint32_t a, uint32_t b)
104 {
105 #ifdef POLYMORPHIC
106     return viwdupq_u8(a, b, 4);
107 #else /* POLYMORPHIC */
108     return viwdupq_n_u8(a, b, 4);
109 #endif /* POLYMORPHIC */
110 }
111 
112 // CHECK-LABEL: @test_viwdupq_n_u16(
113 // CHECK-NEXT:  entry:
114 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 2)
115 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
116 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
117 //
test_viwdupq_n_u16(uint32_t a,uint32_t b)118 uint16x8_t test_viwdupq_n_u16(uint32_t a, uint32_t b)
119 {
120 #ifdef POLYMORPHIC
121     return viwdupq_u16(a, b, 2);
122 #else /* POLYMORPHIC */
123     return viwdupq_n_u16(a, b, 2);
124 #endif /* POLYMORPHIC */
125 }
126 
127 // CHECK-LABEL: @test_viwdupq_n_u32(
128 // CHECK-NEXT:  entry:
129 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
130 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
131 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
132 //
test_viwdupq_n_u32(uint32_t a,uint32_t b)133 uint32x4_t test_viwdupq_n_u32(uint32_t a, uint32_t b)
134 {
135 #ifdef POLYMORPHIC
136     return viwdupq_u32(a, b, 8);
137 #else /* POLYMORPHIC */
138     return viwdupq_n_u32(a, b, 8);
139 #endif /* POLYMORPHIC */
140 }
141 
142 // CHECK-LABEL: @test_vdwdupq_n_u8(
143 // CHECK-NEXT:  entry:
144 // CHECK-NEXT:    [[TMP0:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[A:%.*]], i32 [[B:%.*]], i32 4)
145 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP0]], 0
146 // CHECK-NEXT:    ret <16 x i8> [[TMP1]]
147 //
test_vdwdupq_n_u8(uint32_t a,uint32_t b)148 uint8x16_t test_vdwdupq_n_u8(uint32_t a, uint32_t b)
149 {
150 #ifdef POLYMORPHIC
151     return vdwdupq_u8(a, b, 4);
152 #else /* POLYMORPHIC */
153     return vdwdupq_n_u8(a, b, 4);
154 #endif /* POLYMORPHIC */
155 }
156 
157 // CHECK-LABEL: @test_vdwdupq_n_u16(
158 // CHECK-NEXT:  entry:
159 // CHECK-NEXT:    [[TMP0:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[A:%.*]], i32 [[B:%.*]], i32 8)
160 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP0]], 0
161 // CHECK-NEXT:    ret <8 x i16> [[TMP1]]
162 //
test_vdwdupq_n_u16(uint32_t a,uint32_t b)163 uint16x8_t test_vdwdupq_n_u16(uint32_t a, uint32_t b)
164 {
165 #ifdef POLYMORPHIC
166     return vdwdupq_u16(a, b, 8);
167 #else /* POLYMORPHIC */
168     return vdwdupq_n_u16(a, b, 8);
169 #endif /* POLYMORPHIC */
170 }
171 
172 // CHECK-LABEL: @test_vdwdupq_n_u32(
173 // CHECK-NEXT:  entry:
174 // CHECK-NEXT:    [[TMP0:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[A:%.*]], i32 [[B:%.*]], i32 1)
175 // CHECK-NEXT:    [[TMP1:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP0]], 0
176 // CHECK-NEXT:    ret <4 x i32> [[TMP1]]
177 //
test_vdwdupq_n_u32(uint32_t a,uint32_t b)178 uint32x4_t test_vdwdupq_n_u32(uint32_t a, uint32_t b)
179 {
180 #ifdef POLYMORPHIC
181     return vdwdupq_u32(a, b, 1);
182 #else /* POLYMORPHIC */
183     return vdwdupq_n_u32(a, b, 1);
184 #endif /* POLYMORPHIC */
185 }
186 
187 // CHECK-LABEL: @test_vidupq_wb_u8(
188 // CHECK-NEXT:  entry:
189 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
190 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.v16i8(i32 [[TMP0]], i32 8)
191 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
192 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
193 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
194 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
195 //
test_vidupq_wb_u8(uint32_t * a)196 uint8x16_t test_vidupq_wb_u8(uint32_t *a)
197 {
198 #ifdef POLYMORPHIC
199     return vidupq_u8(a, 8);
200 #else /* POLYMORPHIC */
201     return vidupq_wb_u8(a, 8);
202 #endif /* POLYMORPHIC */
203 }
204 
205 // CHECK-LABEL: @test_vidupq_wb_u16(
206 // CHECK-NEXT:  entry:
207 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
208 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.v8i16(i32 [[TMP0]], i32 1)
209 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
210 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
211 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
212 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
213 //
test_vidupq_wb_u16(uint32_t * a)214 uint16x8_t test_vidupq_wb_u16(uint32_t *a)
215 {
216 #ifdef POLYMORPHIC
217     return vidupq_u16(a, 1);
218 #else /* POLYMORPHIC */
219     return vidupq_wb_u16(a, 1);
220 #endif /* POLYMORPHIC */
221 }
222 
223 // CHECK-LABEL: @test_vidupq_wb_u32(
224 // CHECK-NEXT:  entry:
225 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
226 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.v4i32(i32 [[TMP0]], i32 4)
227 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
228 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
229 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
230 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
231 //
test_vidupq_wb_u32(uint32_t * a)232 uint32x4_t test_vidupq_wb_u32(uint32_t *a)
233 {
234 #ifdef POLYMORPHIC
235     return vidupq_u32(a, 4);
236 #else /* POLYMORPHIC */
237     return vidupq_wb_u32(a, 4);
238 #endif /* POLYMORPHIC */
239 }
240 
241 // CHECK-LABEL: @test_vddupq_wb_u8(
242 // CHECK-NEXT:  entry:
243 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
244 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.v16i8(i32 [[TMP0]], i32 2)
245 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
246 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
247 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
248 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
249 //
test_vddupq_wb_u8(uint32_t * a)250 uint8x16_t test_vddupq_wb_u8(uint32_t *a)
251 {
252 #ifdef POLYMORPHIC
253     return vddupq_u8(a, 2);
254 #else /* POLYMORPHIC */
255     return vddupq_wb_u8(a, 2);
256 #endif /* POLYMORPHIC */
257 }
258 
259 // CHECK-LABEL: @test_vddupq_wb_u16(
260 // CHECK-NEXT:  entry:
261 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
262 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.v8i16(i32 [[TMP0]], i32 8)
263 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
264 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
265 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
266 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
267 //
test_vddupq_wb_u16(uint32_t * a)268 uint16x8_t test_vddupq_wb_u16(uint32_t *a)
269 {
270 #ifdef POLYMORPHIC
271     return vddupq_u16(a, 8);
272 #else /* POLYMORPHIC */
273     return vddupq_wb_u16(a, 8);
274 #endif /* POLYMORPHIC */
275 }
276 
277 // CHECK-LABEL: @test_vddupq_wb_u32(
278 // CHECK-NEXT:  entry:
279 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
280 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.v4i32(i32 [[TMP0]], i32 2)
281 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
282 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
283 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
284 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
285 //
test_vddupq_wb_u32(uint32_t * a)286 uint32x4_t test_vddupq_wb_u32(uint32_t *a)
287 {
288 #ifdef POLYMORPHIC
289     return vddupq_u32(a, 2);
290 #else /* POLYMORPHIC */
291     return vddupq_wb_u32(a, 2);
292 #endif /* POLYMORPHIC */
293 }
294 
295 // CHECK-LABEL: @test_vdwdupq_wb_u8(
296 // CHECK-NEXT:  entry:
297 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
298 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
299 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
300 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
301 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
302 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
303 //
test_vdwdupq_wb_u8(uint32_t * a,uint32_t b)304 uint8x16_t test_vdwdupq_wb_u8(uint32_t *a, uint32_t b)
305 {
306 #ifdef POLYMORPHIC
307     return vdwdupq_u8(a, b, 4);
308 #else /* POLYMORPHIC */
309     return vdwdupq_wb_u8(a, b, 4);
310 #endif /* POLYMORPHIC */
311 }
312 
313 // CHECK-LABEL: @test_vdwdupq_wb_u16(
314 // CHECK-NEXT:  entry:
315 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
316 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 4)
317 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
318 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
319 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
320 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
321 //
test_vdwdupq_wb_u16(uint32_t * a,uint32_t b)322 uint16x8_t test_vdwdupq_wb_u16(uint32_t *a, uint32_t b)
323 {
324 #ifdef POLYMORPHIC
325     return vdwdupq_u16(a, b, 4);
326 #else /* POLYMORPHIC */
327     return vdwdupq_wb_u16(a, b, 4);
328 #endif /* POLYMORPHIC */
329 }
330 
331 // CHECK-LABEL: @test_viwdupq_wb_u8(
332 // CHECK-NEXT:  entry:
333 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
334 // CHECK-NEXT:    [[TMP1:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.v16i8(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
335 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 1
336 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
337 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP1]], 0
338 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
339 //
test_viwdupq_wb_u8(uint32_t * a,uint32_t b)340 uint8x16_t test_viwdupq_wb_u8(uint32_t *a, uint32_t b)
341 {
342 #ifdef POLYMORPHIC
343     return viwdupq_u8(a, b, 1);
344 #else /* POLYMORPHIC */
345     return viwdupq_wb_u8(a, b, 1);
346 #endif /* POLYMORPHIC */
347 }
348 
349 // CHECK-LABEL: @test_viwdupq_wb_u16(
350 // CHECK-NEXT:  entry:
351 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
352 // CHECK-NEXT:    [[TMP1:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.v8i16(i32 [[TMP0]], i32 [[B:%.*]], i32 1)
353 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 1
354 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
355 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP1]], 0
356 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
357 //
test_viwdupq_wb_u16(uint32_t * a,uint32_t b)358 uint16x8_t test_viwdupq_wb_u16(uint32_t *a, uint32_t b)
359 {
360 #ifdef POLYMORPHIC
361     return viwdupq_u16(a, b, 1);
362 #else /* POLYMORPHIC */
363     return viwdupq_wb_u16(a, b, 1);
364 #endif /* POLYMORPHIC */
365 }
366 
367 // CHECK-LABEL: @test_viwdupq_wb_u32(
368 // CHECK-NEXT:  entry:
369 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
370 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 8)
371 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
372 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
373 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
374 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
375 //
test_viwdupq_wb_u32(uint32_t * a,uint32_t b)376 uint32x4_t test_viwdupq_wb_u32(uint32_t *a, uint32_t b)
377 {
378 #ifdef POLYMORPHIC
379     return viwdupq_u32(a, b, 8);
380 #else /* POLYMORPHIC */
381     return viwdupq_wb_u32(a, b, 8);
382 #endif /* POLYMORPHIC */
383 }
384 
385 // CHECK-LABEL: @test_vdwdupq_wb_u32(
386 // CHECK-NEXT:  entry:
387 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
388 // CHECK-NEXT:    [[TMP1:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.v4i32(i32 [[TMP0]], i32 [[B:%.*]], i32 2)
389 // CHECK-NEXT:    [[TMP2:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 1
390 // CHECK-NEXT:    store i32 [[TMP2]], i32* [[A]], align 4
391 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP1]], 0
392 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
393 //
test_vdwdupq_wb_u32(uint32_t * a,uint32_t b)394 uint32x4_t test_vdwdupq_wb_u32(uint32_t *a, uint32_t b)
395 {
396 #ifdef POLYMORPHIC
397     return vdwdupq_u32(a, b, 2);
398 #else /* POLYMORPHIC */
399     return vdwdupq_wb_u32(a, b, 2);
400 #endif /* POLYMORPHIC */
401 }
402 
403 // CHECK-LABEL: @test_vidupq_m_n_u8(
404 // CHECK-NEXT:  entry:
405 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
406 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
407 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
408 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
409 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
410 //
test_vidupq_m_n_u8(uint8x16_t inactive,uint32_t a,mve_pred16_t p)411 uint8x16_t test_vidupq_m_n_u8(uint8x16_t inactive, uint32_t a, mve_pred16_t p)
412 {
413 #ifdef POLYMORPHIC
414     return vidupq_m(inactive, a, 8, p);
415 #else /* POLYMORPHIC */
416     return vidupq_m_n_u8(inactive, a, 8, p);
417 #endif /* POLYMORPHIC */
418 }
419 
420 // CHECK-LABEL: @test_vidupq_m_n_u16(
421 // CHECK-NEXT:  entry:
422 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
423 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
424 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <8 x i1> [[TMP1]])
425 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
426 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
427 //
test_vidupq_m_n_u16(uint16x8_t inactive,uint32_t a,mve_pred16_t p)428 uint16x8_t test_vidupq_m_n_u16(uint16x8_t inactive, uint32_t a, mve_pred16_t p)
429 {
430 #ifdef POLYMORPHIC
431     return vidupq_m(inactive, a, 8, p);
432 #else /* POLYMORPHIC */
433     return vidupq_m_n_u16(inactive, a, 8, p);
434 #endif /* POLYMORPHIC */
435 }
436 
437 // CHECK-LABEL: @test_vidupq_m_n_u32(
438 // CHECK-NEXT:  entry:
439 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
440 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
441 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
442 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
443 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
444 //
test_vidupq_m_n_u32(uint32x4_t inactive,uint32_t a,mve_pred16_t p)445 uint32x4_t test_vidupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
446 {
447 #ifdef POLYMORPHIC
448     return vidupq_m(inactive, a, 2, p);
449 #else /* POLYMORPHIC */
450     return vidupq_m_n_u32(inactive, a, 2, p);
451 #endif /* POLYMORPHIC */
452 }
453 
454 // CHECK-LABEL: @test_vddupq_m_n_u8(
455 // CHECK-NEXT:  entry:
456 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
457 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
458 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
459 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
460 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
461 //
test_vddupq_m_n_u8(uint8x16_t inactive,uint32_t a,mve_pred16_t p)462 uint8x16_t test_vddupq_m_n_u8(uint8x16_t inactive, uint32_t a, mve_pred16_t p)
463 {
464 #ifdef POLYMORPHIC
465     return vddupq_m(inactive, a, 8, p);
466 #else /* POLYMORPHIC */
467     return vddupq_m_n_u8(inactive, a, 8, p);
468 #endif /* POLYMORPHIC */
469 }
470 
471 // CHECK-LABEL: @test_vddupq_m_n_u16(
472 // CHECK-NEXT:  entry:
473 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
474 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
475 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
476 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
477 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
478 //
test_vddupq_m_n_u16(uint16x8_t inactive,uint32_t a,mve_pred16_t p)479 uint16x8_t test_vddupq_m_n_u16(uint16x8_t inactive, uint32_t a, mve_pred16_t p)
480 {
481 #ifdef POLYMORPHIC
482     return vddupq_m(inactive, a, 2, p);
483 #else /* POLYMORPHIC */
484     return vddupq_m_n_u16(inactive, a, 2, p);
485 #endif /* POLYMORPHIC */
486 }
487 
488 // CHECK-LABEL: @test_vddupq_m_n_u32(
489 // CHECK-NEXT:  entry:
490 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
491 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
492 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
493 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
494 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
495 //
test_vddupq_m_n_u32(uint32x4_t inactive,uint32_t a,mve_pred16_t p)496 uint32x4_t test_vddupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
497 {
498 #ifdef POLYMORPHIC
499     return vddupq_m(inactive, a, 8, p);
500 #else /* POLYMORPHIC */
501     return vddupq_m_n_u32(inactive, a, 8, p);
502 #endif /* POLYMORPHIC */
503 }
504 
505 // CHECK-LABEL: @test_viwdupq_m_n_u8(
506 // CHECK-NEXT:  entry:
507 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
508 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
509 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP1]])
510 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
511 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
512 //
test_viwdupq_m_n_u8(uint8x16_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)513 uint8x16_t test_viwdupq_m_n_u8(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
514 {
515 #ifdef POLYMORPHIC
516     return viwdupq_m(inactive, a, b, 8, p);
517 #else /* POLYMORPHIC */
518     return viwdupq_m_n_u8(inactive, a, b, 8, p);
519 #endif /* POLYMORPHIC */
520 }
521 
522 // CHECK-LABEL: @test_viwdupq_m_n_u16(
523 // CHECK-NEXT:  entry:
524 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
525 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
526 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP1]])
527 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
528 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
529 //
test_viwdupq_m_n_u16(uint16x8_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)530 uint16x8_t test_viwdupq_m_n_u16(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
531 {
532 #ifdef POLYMORPHIC
533     return viwdupq_m(inactive, a, b, 8, p);
534 #else /* POLYMORPHIC */
535     return viwdupq_m_n_u16(inactive, a, b, 8, p);
536 #endif /* POLYMORPHIC */
537 }
538 
539 // CHECK-LABEL: @test_viwdupq_m_n_u32(
540 // CHECK-NEXT:  entry:
541 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
542 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
543 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
544 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
545 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
546 //
test_viwdupq_m_n_u32(uint32x4_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)547 uint32x4_t test_viwdupq_m_n_u32(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
548 {
549 #ifdef POLYMORPHIC
550     return viwdupq_m(inactive, a, b, 4, p);
551 #else /* POLYMORPHIC */
552     return viwdupq_m_n_u32(inactive, a, b, 4, p);
553 #endif /* POLYMORPHIC */
554 }
555 
556 // CHECK-LABEL: @test_vdwdupq_m_n_u8(
557 // CHECK-NEXT:  entry:
558 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
559 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
560 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP1]])
561 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
562 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
563 //
test_vdwdupq_m_n_u8(uint8x16_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)564 uint8x16_t test_vdwdupq_m_n_u8(uint8x16_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
565 {
566 #ifdef POLYMORPHIC
567     return vdwdupq_m(inactive, a, b, 1, p);
568 #else /* POLYMORPHIC */
569     return vdwdupq_m_n_u8(inactive, a, b, 1, p);
570 #endif /* POLYMORPHIC */
571 }
572 
573 // CHECK-LABEL: @test_vdwdupq_m_n_u16(
574 // CHECK-NEXT:  entry:
575 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
576 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
577 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
578 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
579 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
580 //
test_vdwdupq_m_n_u16(uint16x8_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)581 uint16x8_t test_vdwdupq_m_n_u16(uint16x8_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
582 {
583 #ifdef POLYMORPHIC
584     return vdwdupq_m(inactive, a, b, 2, p);
585 #else /* POLYMORPHIC */
586     return vdwdupq_m_n_u16(inactive, a, b, 2, p);
587 #endif /* POLYMORPHIC */
588 }
589 
590 // CHECK-LABEL: @test_vdwdupq_m_n_u32(
591 // CHECK-NEXT:  entry:
592 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
593 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
594 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP1]])
595 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
596 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
597 //
test_vdwdupq_m_n_u32(uint32x4_t inactive,uint32_t a,uint32_t b,mve_pred16_t p)598 uint32x4_t test_vdwdupq_m_n_u32(uint32x4_t inactive, uint32_t a, uint32_t b, mve_pred16_t p)
599 {
600 #ifdef POLYMORPHIC
601     return vdwdupq_m(inactive, a, b, 4, p);
602 #else /* POLYMORPHIC */
603     return vdwdupq_m_n_u32(inactive, a, b, 4, p);
604 #endif /* POLYMORPHIC */
605 }
606 
607 // CHECK-LABEL: @test_vidupq_m_wb_u8(
608 // CHECK-NEXT:  entry:
609 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
610 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
611 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
612 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <16 x i1> [[TMP2]])
613 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
614 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
615 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
616 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
617 //
test_vidupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,mve_pred16_t p)618 uint8x16_t test_vidupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, mve_pred16_t p)
619 {
620 #ifdef POLYMORPHIC
621     return vidupq_m(inactive, a, 8, p);
622 #else /* POLYMORPHIC */
623     return vidupq_m_wb_u8(inactive, a, 8, p);
624 #endif /* POLYMORPHIC */
625 }
626 
627 // CHECK-LABEL: @test_vidupq_m_wb_u16(
628 // CHECK-NEXT:  entry:
629 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
630 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
631 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
632 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 2, <8 x i1> [[TMP2]])
633 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
634 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
635 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
636 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
637 //
test_vidupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,mve_pred16_t p)638 uint16x8_t test_vidupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
639 {
640 #ifdef POLYMORPHIC
641     return vidupq_m(inactive, a, 2, p);
642 #else /* POLYMORPHIC */
643     return vidupq_m_wb_u16(inactive, a, 2, p);
644 #endif /* POLYMORPHIC */
645 }
646 
647 // CHECK-LABEL: @test_vidupq_m_wb_u32(
648 // CHECK-NEXT:  entry:
649 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
650 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
651 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
652 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 8, <4 x i1> [[TMP2]])
653 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
654 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
655 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
656 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
657 //
test_vidupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,mve_pred16_t p)658 uint32x4_t test_vidupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
659 {
660 #ifdef POLYMORPHIC
661     return vidupq_m(inactive, a, 8, p);
662 #else /* POLYMORPHIC */
663     return vidupq_m_wb_u32(inactive, a, 8, p);
664 #endif /* POLYMORPHIC */
665 }
666 
667 // CHECK-LABEL: @test_vddupq_m_wb_u8(
668 // CHECK-NEXT:  entry:
669 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
670 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
671 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
672 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
673 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
674 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
675 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
676 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
677 //
test_vddupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,mve_pred16_t p)678 uint8x16_t test_vddupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, mve_pred16_t p)
679 {
680 #ifdef POLYMORPHIC
681     return vddupq_m(inactive, a, 1, p);
682 #else /* POLYMORPHIC */
683     return vddupq_m_wb_u8(inactive, a, 1, p);
684 #endif /* POLYMORPHIC */
685 }
686 
687 // CHECK-LABEL: @test_vddupq_m_wb_u16(
688 // CHECK-NEXT:  entry:
689 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
690 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
691 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
692 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 1, <8 x i1> [[TMP2]])
693 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
694 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
695 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
696 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
697 //
test_vddupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,mve_pred16_t p)698 uint16x8_t test_vddupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, mve_pred16_t p)
699 {
700 #ifdef POLYMORPHIC
701     return vddupq_m(inactive, a, 1, p);
702 #else /* POLYMORPHIC */
703     return vddupq_m_wb_u16(inactive, a, 1, p);
704 #endif /* POLYMORPHIC */
705 }
706 
707 // CHECK-LABEL: @test_vddupq_m_wb_u32(
708 // CHECK-NEXT:  entry:
709 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
710 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
711 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
712 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
713 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
714 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
715 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
716 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
717 //
test_vddupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,mve_pred16_t p)718 uint32x4_t test_vddupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, mve_pred16_t p)
719 {
720 #ifdef POLYMORPHIC
721     return vddupq_m(inactive, a, 4, p);
722 #else /* POLYMORPHIC */
723     return vddupq_m_wb_u32(inactive, a, 4, p);
724 #endif /* POLYMORPHIC */
725 }
726 
727 // CHECK-LABEL: @test_viwdupq_m_wb_u8(
728 // CHECK-NEXT:  entry:
729 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
730 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
731 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
732 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <16 x i1> [[TMP2]])
733 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
734 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
735 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
736 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
737 //
test_viwdupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)738 uint8x16_t test_viwdupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
739 {
740 #ifdef POLYMORPHIC
741     return viwdupq_m(inactive, a, b, 8, p);
742 #else /* POLYMORPHIC */
743     return viwdupq_m_wb_u8(inactive, a, b, 8, p);
744 #endif /* POLYMORPHIC */
745 }
746 
747 // CHECK-LABEL: @test_viwdupq_m_wb_u16(
748 // CHECK-NEXT:  entry:
749 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
750 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
751 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
752 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 8, <8 x i1> [[TMP2]])
753 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
754 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
755 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
756 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
757 //
test_viwdupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)758 uint16x8_t test_viwdupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
759 {
760 #ifdef POLYMORPHIC
761     return viwdupq_m(inactive, a, b, 8, p);
762 #else /* POLYMORPHIC */
763     return viwdupq_m_wb_u16(inactive, a, b, 8, p);
764 #endif /* POLYMORPHIC */
765 }
766 
767 // CHECK-LABEL: @test_viwdupq_m_wb_u32(
768 // CHECK-NEXT:  entry:
769 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
770 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
771 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
772 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
773 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
774 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
775 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
776 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
777 //
test_viwdupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)778 uint32x4_t test_viwdupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
779 {
780 #ifdef POLYMORPHIC
781     return viwdupq_m(inactive, a, b, 4, p);
782 #else /* POLYMORPHIC */
783     return viwdupq_m_wb_u32(inactive, a, b, 4, p);
784 #endif /* POLYMORPHIC */
785 }
786 
787 // CHECK-LABEL: @test_vdwdupq_m_wb_u8(
788 // CHECK-NEXT:  entry:
789 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
790 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
791 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
792 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
793 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
794 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
795 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
796 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
797 //
test_vdwdupq_m_wb_u8(uint8x16_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)798 uint8x16_t test_vdwdupq_m_wb_u8(uint8x16_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
799 {
800 #ifdef POLYMORPHIC
801     return vdwdupq_m(inactive, a, b, 1, p);
802 #else /* POLYMORPHIC */
803     return vdwdupq_m_wb_u8(inactive, a, b, 1, p);
804 #endif /* POLYMORPHIC */
805 }
806 
807 // CHECK-LABEL: @test_vdwdupq_m_wb_u16(
808 // CHECK-NEXT:  entry:
809 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
810 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
811 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
812 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
813 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
814 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
815 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
816 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
817 //
test_vdwdupq_m_wb_u16(uint16x8_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)818 uint16x8_t test_vdwdupq_m_wb_u16(uint16x8_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
819 {
820 #ifdef POLYMORPHIC
821     return vdwdupq_m(inactive, a, b, 4, p);
822 #else /* POLYMORPHIC */
823     return vdwdupq_m_wb_u16(inactive, a, b, 4, p);
824 #endif /* POLYMORPHIC */
825 }
826 
827 // CHECK-LABEL: @test_vdwdupq_m_wb_u32(
828 // CHECK-NEXT:  entry:
829 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
830 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
831 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
832 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> [[INACTIVE:%.*]], i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
833 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
834 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
835 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
836 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
837 //
test_vdwdupq_m_wb_u32(uint32x4_t inactive,uint32_t * a,uint32_t b,mve_pred16_t p)838 uint32x4_t test_vdwdupq_m_wb_u32(uint32x4_t inactive, uint32_t *a, uint32_t b, mve_pred16_t p)
839 {
840 #ifdef POLYMORPHIC
841     return vdwdupq_m(inactive, a, b, 4, p);
842 #else /* POLYMORPHIC */
843     return vdwdupq_m_wb_u32(inactive, a, b, 4, p);
844 #endif /* POLYMORPHIC */
845 }
846 
847 // CHECK-LABEL: @test_vidupq_x_n_u8(
848 // CHECK-NEXT:  entry:
849 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
850 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
851 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 2, <16 x i1> [[TMP1]])
852 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
853 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
854 //
test_vidupq_x_n_u8(uint32_t a,mve_pred16_t p)855 uint8x16_t test_vidupq_x_n_u8(uint32_t a, mve_pred16_t p)
856 {
857 #ifdef POLYMORPHIC
858     return vidupq_x_u8(a, 2, p);
859 #else /* POLYMORPHIC */
860     return vidupq_x_n_u8(a, 2, p);
861 #endif /* POLYMORPHIC */
862 }
863 
864 // CHECK-LABEL: @test_vidupq_x_n_u16(
865 // CHECK-NEXT:  entry:
866 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
867 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
868 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 2, <8 x i1> [[TMP1]])
869 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
870 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
871 //
test_vidupq_x_n_u16(uint32_t a,mve_pred16_t p)872 uint16x8_t test_vidupq_x_n_u16(uint32_t a, mve_pred16_t p)
873 {
874 #ifdef POLYMORPHIC
875     return vidupq_x_u16(a, 2, p);
876 #else /* POLYMORPHIC */
877     return vidupq_x_n_u16(a, 2, p);
878 #endif /* POLYMORPHIC */
879 }
880 
881 // CHECK-LABEL: @test_vidupq_x_n_u32(
882 // CHECK-NEXT:  entry:
883 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
884 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
885 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 8, <4 x i1> [[TMP1]])
886 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
887 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
888 //
test_vidupq_x_n_u32(uint32_t a,mve_pred16_t p)889 uint32x4_t test_vidupq_x_n_u32(uint32_t a, mve_pred16_t p)
890 {
891 #ifdef POLYMORPHIC
892     return vidupq_x_u32(a, 8, p);
893 #else /* POLYMORPHIC */
894     return vidupq_x_n_u32(a, 8, p);
895 #endif /* POLYMORPHIC */
896 }
897 
898 // CHECK-LABEL: @test_vddupq_x_n_u8(
899 // CHECK-NEXT:  entry:
900 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
901 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
902 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 8, <16 x i1> [[TMP1]])
903 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
904 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
905 //
test_vddupq_x_n_u8(uint32_t a,mve_pred16_t p)906 uint8x16_t test_vddupq_x_n_u8(uint32_t a, mve_pred16_t p)
907 {
908 #ifdef POLYMORPHIC
909     return vddupq_x_u8(a, 8, p);
910 #else /* POLYMORPHIC */
911     return vddupq_x_n_u8(a, 8, p);
912 #endif /* POLYMORPHIC */
913 }
914 
915 // CHECK-LABEL: @test_vddupq_x_n_u16(
916 // CHECK-NEXT:  entry:
917 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
918 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
919 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 4, <8 x i1> [[TMP1]])
920 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
921 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
922 //
test_vddupq_x_n_u16(uint32_t a,mve_pred16_t p)923 uint16x8_t test_vddupq_x_n_u16(uint32_t a, mve_pred16_t p)
924 {
925 #ifdef POLYMORPHIC
926     return vddupq_x_u16(a, 4, p);
927 #else /* POLYMORPHIC */
928     return vddupq_x_n_u16(a, 4, p);
929 #endif /* POLYMORPHIC */
930 }
931 
932 // CHECK-LABEL: @test_vddupq_x_n_u32(
933 // CHECK-NEXT:  entry:
934 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
935 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
936 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 2, <4 x i1> [[TMP1]])
937 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
938 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
939 //
test_vddupq_x_n_u32(uint32_t a,mve_pred16_t p)940 uint32x4_t test_vddupq_x_n_u32(uint32_t a, mve_pred16_t p)
941 {
942 #ifdef POLYMORPHIC
943     return vddupq_x_u32(a, 2, p);
944 #else /* POLYMORPHIC */
945     return vddupq_x_n_u32(a, 2, p);
946 #endif /* POLYMORPHIC */
947 }
948 
949 // CHECK-LABEL: @test_viwdupq_x_n_u8(
950 // CHECK-NEXT:  entry:
951 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
952 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
953 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
954 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
955 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
956 //
test_viwdupq_x_n_u8(uint32_t a,uint32_t b,mve_pred16_t p)957 uint8x16_t test_viwdupq_x_n_u8(uint32_t a, uint32_t b, mve_pred16_t p)
958 {
959 #ifdef POLYMORPHIC
960     return viwdupq_x_u8(a, b, 2, p);
961 #else /* POLYMORPHIC */
962     return viwdupq_x_n_u8(a, b, 2, p);
963 #endif /* POLYMORPHIC */
964 }
965 
966 // CHECK-LABEL: @test_viwdupq_x_n_u16(
967 // CHECK-NEXT:  entry:
968 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
969 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
970 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP1]])
971 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
972 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
973 //
test_viwdupq_x_n_u16(uint32_t a,uint32_t b,mve_pred16_t p)974 uint16x8_t test_viwdupq_x_n_u16(uint32_t a, uint32_t b, mve_pred16_t p)
975 {
976 #ifdef POLYMORPHIC
977     return viwdupq_x_u16(a, b, 4, p);
978 #else /* POLYMORPHIC */
979     return viwdupq_x_n_u16(a, b, 4, p);
980 #endif /* POLYMORPHIC */
981 }
982 
983 // CHECK-LABEL: @test_viwdupq_x_n_u32(
984 // CHECK-NEXT:  entry:
985 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
986 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
987 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <4 x i1> [[TMP1]])
988 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
989 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
990 //
test_viwdupq_x_n_u32(uint32_t a,uint32_t b,mve_pred16_t p)991 uint32x4_t test_viwdupq_x_n_u32(uint32_t a, uint32_t b, mve_pred16_t p)
992 {
993 #ifdef POLYMORPHIC
994     return viwdupq_x_u32(a, b, 2, p);
995 #else /* POLYMORPHIC */
996     return viwdupq_x_n_u32(a, b, 2, p);
997 #endif /* POLYMORPHIC */
998 }
999 
1000 // CHECK-LABEL: @test_vdwdupq_x_n_u8(
1001 // CHECK-NEXT:  entry:
1002 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1003 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
1004 // CHECK-NEXT:    [[TMP2:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <16 x i1> [[TMP1]])
1005 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP2]], 0
1006 // CHECK-NEXT:    ret <16 x i8> [[TMP3]]
1007 //
test_vdwdupq_x_n_u8(uint32_t a,uint32_t b,mve_pred16_t p)1008 uint8x16_t test_vdwdupq_x_n_u8(uint32_t a, uint32_t b, mve_pred16_t p)
1009 {
1010 #ifdef POLYMORPHIC
1011     return vdwdupq_x_u8(a, b, 2, p);
1012 #else /* POLYMORPHIC */
1013     return vdwdupq_x_n_u8(a, b, 2, p);
1014 #endif /* POLYMORPHIC */
1015 }
1016 
1017 // CHECK-LABEL: @test_vdwdupq_x_n_u16(
1018 // CHECK-NEXT:  entry:
1019 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1020 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
1021 // CHECK-NEXT:    [[TMP2:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP1]])
1022 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP2]], 0
1023 // CHECK-NEXT:    ret <8 x i16> [[TMP3]]
1024 //
test_vdwdupq_x_n_u16(uint32_t a,uint32_t b,mve_pred16_t p)1025 uint16x8_t test_vdwdupq_x_n_u16(uint32_t a, uint32_t b, mve_pred16_t p)
1026 {
1027 #ifdef POLYMORPHIC
1028     return vdwdupq_x_u16(a, b, 2, p);
1029 #else /* POLYMORPHIC */
1030     return vdwdupq_x_n_u16(a, b, 2, p);
1031 #endif /* POLYMORPHIC */
1032 }
1033 
1034 // CHECK-LABEL: @test_vdwdupq_x_n_u32(
1035 // CHECK-NEXT:  entry:
1036 // CHECK-NEXT:    [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32
1037 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
1038 // CHECK-NEXT:    [[TMP2:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[A:%.*]], i32 [[B:%.*]], i32 8, <4 x i1> [[TMP1]])
1039 // CHECK-NEXT:    [[TMP3:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP2]], 0
1040 // CHECK-NEXT:    ret <4 x i32> [[TMP3]]
1041 //
test_vdwdupq_x_n_u32(uint32_t a,uint32_t b,mve_pred16_t p)1042 uint32x4_t test_vdwdupq_x_n_u32(uint32_t a, uint32_t b, mve_pred16_t p)
1043 {
1044 #ifdef POLYMORPHIC
1045     return vdwdupq_x_u32(a, b, 8, p);
1046 #else /* POLYMORPHIC */
1047     return vdwdupq_x_n_u32(a, b, 8, p);
1048 #endif /* POLYMORPHIC */
1049 }
1050 
1051 // CHECK-LABEL: @test_vidupq_x_wb_u8(
1052 // CHECK-NEXT:  entry:
1053 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1054 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1055 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1056 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vidup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 2, <16 x i1> [[TMP2]])
1057 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1058 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1059 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1060 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1061 //
test_vidupq_x_wb_u8(uint32_t * a,mve_pred16_t p)1062 uint8x16_t test_vidupq_x_wb_u8(uint32_t *a, mve_pred16_t p)
1063 {
1064 #ifdef POLYMORPHIC
1065     return vidupq_x_u8(a, 2, p);
1066 #else /* POLYMORPHIC */
1067     return vidupq_x_wb_u8(a, 2, p);
1068 #endif /* POLYMORPHIC */
1069 }
1070 
1071 // CHECK-LABEL: @test_vidupq_x_wb_u16(
1072 // CHECK-NEXT:  entry:
1073 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1074 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1075 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1076 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vidup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1077 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1078 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1079 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1080 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1081 //
test_vidupq_x_wb_u16(uint32_t * a,mve_pred16_t p)1082 uint16x8_t test_vidupq_x_wb_u16(uint32_t *a, mve_pred16_t p)
1083 {
1084 #ifdef POLYMORPHIC
1085     return vidupq_x_u16(a, 4, p);
1086 #else /* POLYMORPHIC */
1087     return vidupq_x_wb_u16(a, 4, p);
1088 #endif /* POLYMORPHIC */
1089 }
1090 
1091 // CHECK-LABEL: @test_vidupq_x_wb_u32(
1092 // CHECK-NEXT:  entry:
1093 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1094 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1095 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1096 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vidup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 2, <4 x i1> [[TMP2]])
1097 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1098 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1099 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1100 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1101 //
test_vidupq_x_wb_u32(uint32_t * a,mve_pred16_t p)1102 uint32x4_t test_vidupq_x_wb_u32(uint32_t *a, mve_pred16_t p)
1103 {
1104 #ifdef POLYMORPHIC
1105     return vidupq_x_u32(a, 2, p);
1106 #else /* POLYMORPHIC */
1107     return vidupq_x_wb_u32(a, 2, p);
1108 #endif /* POLYMORPHIC */
1109 }
1110 
1111 // CHECK-LABEL: @test_vddupq_x_wb_u8(
1112 // CHECK-NEXT:  entry:
1113 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1114 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1115 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1116 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vddup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 1, <16 x i1> [[TMP2]])
1117 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1118 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1119 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1120 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1121 //
test_vddupq_x_wb_u8(uint32_t * a,mve_pred16_t p)1122 uint8x16_t test_vddupq_x_wb_u8(uint32_t *a, mve_pred16_t p)
1123 {
1124 #ifdef POLYMORPHIC
1125     return vddupq_x_u8(a, 1, p);
1126 #else /* POLYMORPHIC */
1127     return vddupq_x_wb_u8(a, 1, p);
1128 #endif /* POLYMORPHIC */
1129 }
1130 
1131 // CHECK-LABEL: @test_vddupq_x_wb_u16(
1132 // CHECK-NEXT:  entry:
1133 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1134 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1135 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1136 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vddup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 4, <8 x i1> [[TMP2]])
1137 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1138 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1139 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1140 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1141 //
test_vddupq_x_wb_u16(uint32_t * a,mve_pred16_t p)1142 uint16x8_t test_vddupq_x_wb_u16(uint32_t *a, mve_pred16_t p)
1143 {
1144 #ifdef POLYMORPHIC
1145     return vddupq_x_u16(a, 4, p);
1146 #else /* POLYMORPHIC */
1147     return vddupq_x_wb_u16(a, 4, p);
1148 #endif /* POLYMORPHIC */
1149 }
1150 
1151 // CHECK-LABEL: @test_vddupq_x_wb_u32(
1152 // CHECK-NEXT:  entry:
1153 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1154 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1155 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1156 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vddup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 4, <4 x i1> [[TMP2]])
1157 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1158 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1159 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1160 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1161 //
test_vddupq_x_wb_u32(uint32_t * a,mve_pred16_t p)1162 uint32x4_t test_vddupq_x_wb_u32(uint32_t *a, mve_pred16_t p)
1163 {
1164 #ifdef POLYMORPHIC
1165     return vddupq_x_u32(a, 4, p);
1166 #else /* POLYMORPHIC */
1167     return vddupq_x_wb_u32(a, 4, p);
1168 #endif /* POLYMORPHIC */
1169 }
1170 
1171 // CHECK-LABEL: @test_viwdupq_x_wb_u8(
1172 // CHECK-NEXT:  entry:
1173 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1174 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1175 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1176 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.viwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <16 x i1> [[TMP2]])
1177 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1178 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1179 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1180 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1181 //
test_viwdupq_x_wb_u8(uint32_t * a,uint32_t b,mve_pred16_t p)1182 uint8x16_t test_viwdupq_x_wb_u8(uint32_t *a, uint32_t b, mve_pred16_t p)
1183 {
1184 #ifdef POLYMORPHIC
1185     return viwdupq_x_u8(a, b, 1, p);
1186 #else /* POLYMORPHIC */
1187     return viwdupq_x_wb_u8(a, b, 1, p);
1188 #endif /* POLYMORPHIC */
1189 }
1190 
1191 // CHECK-LABEL: @test_viwdupq_x_wb_u16(
1192 // CHECK-NEXT:  entry:
1193 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1194 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1195 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1196 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.viwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 2, <8 x i1> [[TMP2]])
1197 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1198 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1199 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1200 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1201 //
test_viwdupq_x_wb_u16(uint32_t * a,uint32_t b,mve_pred16_t p)1202 uint16x8_t test_viwdupq_x_wb_u16(uint32_t *a, uint32_t b, mve_pred16_t p)
1203 {
1204 #ifdef POLYMORPHIC
1205     return viwdupq_x_u16(a, b, 2, p);
1206 #else /* POLYMORPHIC */
1207     return viwdupq_x_wb_u16(a, b, 2, p);
1208 #endif /* POLYMORPHIC */
1209 }
1210 
1211 // CHECK-LABEL: @test_viwdupq_x_wb_u32(
1212 // CHECK-NEXT:  entry:
1213 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1214 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1215 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1216 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.viwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 1, <4 x i1> [[TMP2]])
1217 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1218 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1219 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1220 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1221 //
test_viwdupq_x_wb_u32(uint32_t * a,uint32_t b,mve_pred16_t p)1222 uint32x4_t test_viwdupq_x_wb_u32(uint32_t *a, uint32_t b, mve_pred16_t p)
1223 {
1224 #ifdef POLYMORPHIC
1225     return viwdupq_x_u32(a, b, 1, p);
1226 #else /* POLYMORPHIC */
1227     return viwdupq_x_wb_u32(a, b, 1, p);
1228 #endif /* POLYMORPHIC */
1229 }
1230 
1231 // CHECK-LABEL: @test_vdwdupq_x_wb_u8(
1232 // CHECK-NEXT:  entry:
1233 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1234 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1235 // CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP1]])
1236 // CHECK-NEXT:    [[TMP3:%.*]] = call { <16 x i8>, i32 } @llvm.arm.mve.vdwdup.predicated.v16i8.v16i1(<16 x i8> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <16 x i1> [[TMP2]])
1237 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 1
1238 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1239 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <16 x i8>, i32 } [[TMP3]], 0
1240 // CHECK-NEXT:    ret <16 x i8> [[TMP5]]
1241 //
test_vdwdupq_x_wb_u8(uint32_t * a,uint32_t b,mve_pred16_t p)1242 uint8x16_t test_vdwdupq_x_wb_u8(uint32_t *a, uint32_t b, mve_pred16_t p)
1243 {
1244 #ifdef POLYMORPHIC
1245     return vdwdupq_x_u8(a, b, 4, p);
1246 #else /* POLYMORPHIC */
1247     return vdwdupq_x_wb_u8(a, b, 4, p);
1248 #endif /* POLYMORPHIC */
1249 }
1250 
1251 // CHECK-LABEL: @test_vdwdupq_x_wb_u16(
1252 // CHECK-NEXT:  entry:
1253 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1254 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1255 // CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP1]])
1256 // CHECK-NEXT:    [[TMP3:%.*]] = call { <8 x i16>, i32 } @llvm.arm.mve.vdwdup.predicated.v8i16.v8i1(<8 x i16> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <8 x i1> [[TMP2]])
1257 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 1
1258 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1259 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <8 x i16>, i32 } [[TMP3]], 0
1260 // CHECK-NEXT:    ret <8 x i16> [[TMP5]]
1261 //
test_vdwdupq_x_wb_u16(uint32_t * a,uint32_t b,mve_pred16_t p)1262 uint16x8_t test_vdwdupq_x_wb_u16(uint32_t *a, uint32_t b, mve_pred16_t p)
1263 {
1264 #ifdef POLYMORPHIC
1265     return vdwdupq_x_u16(a, b, 4, p);
1266 #else /* POLYMORPHIC */
1267     return vdwdupq_x_wb_u16(a, b, 4, p);
1268 #endif /* POLYMORPHIC */
1269 }
1270 
1271 // CHECK-LABEL: @test_vdwdupq_x_wb_u32(
1272 // CHECK-NEXT:  entry:
1273 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
1274 // CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[P:%.*]] to i32
1275 // CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP1]])
1276 // CHECK-NEXT:    [[TMP3:%.*]] = call { <4 x i32>, i32 } @llvm.arm.mve.vdwdup.predicated.v4i32.v4i1(<4 x i32> undef, i32 [[TMP0]], i32 [[B:%.*]], i32 4, <4 x i1> [[TMP2]])
1277 // CHECK-NEXT:    [[TMP4:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 1
1278 // CHECK-NEXT:    store i32 [[TMP4]], i32* [[A]], align 4
1279 // CHECK-NEXT:    [[TMP5:%.*]] = extractvalue { <4 x i32>, i32 } [[TMP3]], 0
1280 // CHECK-NEXT:    ret <4 x i32> [[TMP5]]
1281 //
test_vdwdupq_x_wb_u32(uint32_t * a,uint32_t b,mve_pred16_t p)1282 uint32x4_t test_vdwdupq_x_wb_u32(uint32_t *a, uint32_t b, mve_pred16_t p)
1283 {
1284 #ifdef POLYMORPHIC
1285     return vdwdupq_x_u32(a, b, 4, p);
1286 #else /* POLYMORPHIC */
1287     return vdwdupq_x_wb_u32(a, b, 4, p);
1288 #endif /* POLYMORPHIC */
1289 }
1290