1; Test vector insertion of memory values.
2;
3; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
4
5; Test v16i8 insertion into the first element.
6define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) {
7; CHECK-LABEL: f1:
8; CHECK: vleb %v24, 0(%r2), 0
9; CHECK: br %r14
10  %element = load i8, i8 *%ptr
11  %ret = insertelement <16 x i8> %val, i8 %element, i32 0
12  ret <16 x i8> %ret
13}
14
15; Test v16i8 insertion into the last element.
16define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) {
17; CHECK-LABEL: f2:
18; CHECK: vleb %v24, 0(%r2), 15
19; CHECK: br %r14
20  %element = load i8, i8 *%ptr
21  %ret = insertelement <16 x i8> %val, i8 %element, i32 15
22  ret <16 x i8> %ret
23}
24
25; Test v16i8 insertion with the highest in-range offset.
26define <16 x i8> @f3(<16 x i8> %val, i8 *%base) {
27; CHECK-LABEL: f3:
28; CHECK: vleb %v24, 4095(%r2), 10
29; CHECK: br %r14
30  %ptr = getelementptr i8, i8 *%base, i32 4095
31  %element = load i8, i8 *%ptr
32  %ret = insertelement <16 x i8> %val, i8 %element, i32 10
33  ret <16 x i8> %ret
34}
35
36; Test v16i8 insertion with the first ouf-of-range offset.
37define <16 x i8> @f4(<16 x i8> %val, i8 *%base) {
38; CHECK-LABEL: f4:
39; CHECK: aghi %r2, 4096
40; CHECK: vleb %v24, 0(%r2), 5
41; CHECK: br %r14
42  %ptr = getelementptr i8, i8 *%base, i32 4096
43  %element = load i8, i8 *%ptr
44  %ret = insertelement <16 x i8> %val, i8 %element, i32 5
45  ret <16 x i8> %ret
46}
47
48; Test v16i8 insertion into a variable element.
49define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) {
50; CHECK-LABEL: f5:
51; CHECK-NOT: vleb
52; CHECK: br %r14
53  %element = load i8, i8 *%ptr
54  %ret = insertelement <16 x i8> %val, i8 %element, i32 %index
55  ret <16 x i8> %ret
56}
57
58; Test v8i16 insertion into the first element.
59define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
60; CHECK-LABEL: f6:
61; CHECK: vleh %v24, 0(%r2), 0
62; CHECK: br %r14
63  %element = load i16, i16 *%ptr
64  %ret = insertelement <8 x i16> %val, i16 %element, i32 0
65  ret <8 x i16> %ret
66}
67
68; Test v8i16 insertion into the last element.
69define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) {
70; CHECK-LABEL: f7:
71; CHECK: vleh %v24, 0(%r2), 7
72; CHECK: br %r14
73  %element = load i16, i16 *%ptr
74  %ret = insertelement <8 x i16> %val, i16 %element, i32 7
75  ret <8 x i16> %ret
76}
77
78; Test v8i16 insertion with the highest in-range offset.
79define <8 x i16> @f8(<8 x i16> %val, i16 *%base) {
80; CHECK-LABEL: f8:
81; CHECK: vleh %v24, 4094(%r2), 5
82; CHECK: br %r14
83  %ptr = getelementptr i16, i16 *%base, i32 2047
84  %element = load i16, i16 *%ptr
85  %ret = insertelement <8 x i16> %val, i16 %element, i32 5
86  ret <8 x i16> %ret
87}
88
89; Test v8i16 insertion with the first ouf-of-range offset.
90define <8 x i16> @f9(<8 x i16> %val, i16 *%base) {
91; CHECK-LABEL: f9:
92; CHECK: aghi %r2, 4096
93; CHECK: vleh %v24, 0(%r2), 1
94; CHECK: br %r14
95  %ptr = getelementptr i16, i16 *%base, i32 2048
96  %element = load i16, i16 *%ptr
97  %ret = insertelement <8 x i16> %val, i16 %element, i32 1
98  ret <8 x i16> %ret
99}
100
101; Test v8i16 insertion into a variable element.
102define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) {
103; CHECK-LABEL: f10:
104; CHECK-NOT: vleh
105; CHECK: br %r14
106  %element = load i16, i16 *%ptr
107  %ret = insertelement <8 x i16> %val, i16 %element, i32 %index
108  ret <8 x i16> %ret
109}
110
111; Test v4i32 insertion into the first element.
112define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) {
113; CHECK-LABEL: f11:
114; CHECK: vlef %v24, 0(%r2), 0
115; CHECK: br %r14
116  %element = load i32, i32 *%ptr
117  %ret = insertelement <4 x i32> %val, i32 %element, i32 0
118  ret <4 x i32> %ret
119}
120
121; Test v4i32 insertion into the last element.
122define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
123; CHECK-LABEL: f12:
124; CHECK: vlef %v24, 0(%r2), 3
125; CHECK: br %r14
126  %element = load i32, i32 *%ptr
127  %ret = insertelement <4 x i32> %val, i32 %element, i32 3
128  ret <4 x i32> %ret
129}
130
131; Test v4i32 insertion with the highest in-range offset.
132define <4 x i32> @f13(<4 x i32> %val, i32 *%base) {
133; CHECK-LABEL: f13:
134; CHECK: vlef %v24, 4092(%r2), 2
135; CHECK: br %r14
136  %ptr = getelementptr i32, i32 *%base, i32 1023
137  %element = load i32, i32 *%ptr
138  %ret = insertelement <4 x i32> %val, i32 %element, i32 2
139  ret <4 x i32> %ret
140}
141
142; Test v4i32 insertion with the first ouf-of-range offset.
143define <4 x i32> @f14(<4 x i32> %val, i32 *%base) {
144; CHECK-LABEL: f14:
145; CHECK: aghi %r2, 4096
146; CHECK: vlef %v24, 0(%r2), 1
147; CHECK: br %r14
148  %ptr = getelementptr i32, i32 *%base, i32 1024
149  %element = load i32, i32 *%ptr
150  %ret = insertelement <4 x i32> %val, i32 %element, i32 1
151  ret <4 x i32> %ret
152}
153
154; Test v4i32 insertion into a variable element.
155define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) {
156; CHECK-LABEL: f15:
157; CHECK-NOT: vlef
158; CHECK: br %r14
159  %element = load i32, i32 *%ptr
160  %ret = insertelement <4 x i32> %val, i32 %element, i32 %index
161  ret <4 x i32> %ret
162}
163
164; Test v2i64 insertion into the first element.
165define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) {
166; CHECK-LABEL: f16:
167; CHECK: vleg %v24, 0(%r2), 0
168; CHECK: br %r14
169  %element = load i64, i64 *%ptr
170  %ret = insertelement <2 x i64> %val, i64 %element, i32 0
171  ret <2 x i64> %ret
172}
173
174; Test v2i64 insertion into the last element.
175define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) {
176; CHECK-LABEL: f17:
177; CHECK: vleg %v24, 0(%r2), 1
178; CHECK: br %r14
179  %element = load i64, i64 *%ptr
180  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
181  ret <2 x i64> %ret
182}
183
184; Test v2i64 insertion with the highest in-range offset.
185define <2 x i64> @f18(<2 x i64> %val, i64 *%base) {
186; CHECK-LABEL: f18:
187; CHECK: vleg %v24, 4088(%r2), 1
188; CHECK: br %r14
189  %ptr = getelementptr i64, i64 *%base, i32 511
190  %element = load i64, i64 *%ptr
191  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
192  ret <2 x i64> %ret
193}
194
195; Test v2i64 insertion with the first ouf-of-range offset.
196define <2 x i64> @f19(<2 x i64> %val, i64 *%base) {
197; CHECK-LABEL: f19:
198; CHECK: aghi %r2, 4096
199; CHECK: vleg %v24, 0(%r2), 0
200; CHECK: br %r14
201  %ptr = getelementptr i64, i64 *%base, i32 512
202  %element = load i64, i64 *%ptr
203  %ret = insertelement <2 x i64> %val, i64 %element, i32 0
204  ret <2 x i64> %ret
205}
206
207; Test v2i64 insertion into a variable element.
208define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
209; CHECK-LABEL: f20:
210; CHECK-NOT: vleg
211; CHECK: br %r14
212  %element = load i64, i64 *%ptr
213  %ret = insertelement <2 x i64> %val, i64 %element, i32 %index
214  ret <2 x i64> %ret
215}
216
217; Test v4f32 insertion into the first element.
218define <4 x float> @f21(<4 x float> %val, float *%ptr) {
219; CHECK-LABEL: f21:
220; CHECK: vlef %v24, 0(%r2), 0
221; CHECK: br %r14
222  %element = load float, float *%ptr
223  %ret = insertelement <4 x float> %val, float %element, i32 0
224  ret <4 x float> %ret
225}
226
227; Test v4f32 insertion into the last element.
228define <4 x float> @f22(<4 x float> %val, float *%ptr) {
229; CHECK-LABEL: f22:
230; CHECK: vlef %v24, 0(%r2), 3
231; CHECK: br %r14
232  %element = load float, float *%ptr
233  %ret = insertelement <4 x float> %val, float %element, i32 3
234  ret <4 x float> %ret
235}
236
237; Test v4f32 insertion with the highest in-range offset.
238define <4 x float> @f23(<4 x float> %val, float *%base) {
239; CHECK-LABEL: f23:
240; CHECK: vlef %v24, 4092(%r2), 2
241; CHECK: br %r14
242  %ptr = getelementptr float, float *%base, i32 1023
243  %element = load float, float *%ptr
244  %ret = insertelement <4 x float> %val, float %element, i32 2
245  ret <4 x float> %ret
246}
247
248; Test v4f32 insertion with the first ouf-of-range offset.
249define <4 x float> @f24(<4 x float> %val, float *%base) {
250; CHECK-LABEL: f24:
251; CHECK: aghi %r2, 4096
252; CHECK: vlef %v24, 0(%r2), 1
253; CHECK: br %r14
254  %ptr = getelementptr float, float *%base, i32 1024
255  %element = load float, float *%ptr
256  %ret = insertelement <4 x float> %val, float %element, i32 1
257  ret <4 x float> %ret
258}
259
260; Test v4f32 insertion into a variable element.
261define <4 x float> @f25(<4 x float> %val, float *%ptr, i32 %index) {
262; CHECK-LABEL: f25:
263; CHECK-NOT: vlef
264; CHECK: br %r14
265  %element = load float, float *%ptr
266  %ret = insertelement <4 x float> %val, float %element, i32 %index
267  ret <4 x float> %ret
268}
269
270; Test v2f64 insertion into the first element.
271define <2 x double> @f26(<2 x double> %val, double *%ptr) {
272; CHECK-LABEL: f26:
273; CHECK: vleg %v24, 0(%r2), 0
274; CHECK: br %r14
275  %element = load double, double *%ptr
276  %ret = insertelement <2 x double> %val, double %element, i32 0
277  ret <2 x double> %ret
278}
279
280; Test v2f64 insertion into the last element.
281define <2 x double> @f27(<2 x double> %val, double *%ptr) {
282; CHECK-LABEL: f27:
283; CHECK: vleg %v24, 0(%r2), 1
284; CHECK: br %r14
285  %element = load double, double *%ptr
286  %ret = insertelement <2 x double> %val, double %element, i32 1
287  ret <2 x double> %ret
288}
289
290; Test v2f64 insertion with the highest in-range offset.
291define <2 x double> @f28(<2 x double> %val, double *%base) {
292; CHECK-LABEL: f28:
293; CHECK: vleg %v24, 4088(%r2), 1
294; CHECK: br %r14
295  %ptr = getelementptr double, double *%base, i32 511
296  %element = load double, double *%ptr
297  %ret = insertelement <2 x double> %val, double %element, i32 1
298  ret <2 x double> %ret
299}
300
301; Test v2f64 insertion with the first ouf-of-range offset.
302define <2 x double> @f29(<2 x double> %val, double *%base) {
303; CHECK-LABEL: f29:
304; CHECK: aghi %r2, 4096
305; CHECK: vleg %v24, 0(%r2), 0
306; CHECK: br %r14
307  %ptr = getelementptr double, double *%base, i32 512
308  %element = load double, double *%ptr
309  %ret = insertelement <2 x double> %val, double %element, i32 0
310  ret <2 x double> %ret
311}
312
313; Test v2f64 insertion into a variable element.
314define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) {
315; CHECK-LABEL: f30:
316; CHECK-NOT: vleg
317; CHECK: br %r14
318  %element = load double, double *%ptr
319  %ret = insertelement <2 x double> %val, double %element, i32 %index
320  ret <2 x double> %ret
321}
322
323; Test a v4i32 gather of the first element.
324define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) {
325; CHECK-LABEL: f31:
326; CHECK: vgef %v24, 0(%v26,%r2), 0
327; CHECK: br %r14
328  %elem = extractelement <4 x i32> %index, i32 0
329  %ext = zext i32 %elem to i64
330  %add = add i64 %base, %ext
331  %ptr = inttoptr i64 %add to i32 *
332  %element = load i32, i32 *%ptr
333  %ret = insertelement <4 x i32> %val, i32 %element, i32 0
334  ret <4 x i32> %ret
335}
336
337; Test a v4i32 gather of the last element.
338define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) {
339; CHECK-LABEL: f32:
340; CHECK: vgef %v24, 0(%v26,%r2), 3
341; CHECK: br %r14
342  %elem = extractelement <4 x i32> %index, i32 3
343  %ext = zext i32 %elem to i64
344  %add = add i64 %base, %ext
345  %ptr = inttoptr i64 %add to i32 *
346  %element = load i32, i32 *%ptr
347  %ret = insertelement <4 x i32> %val, i32 %element, i32 3
348  ret <4 x i32> %ret
349}
350
351; Test a v4i32 gather with the highest in-range offset.
352define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) {
353; CHECK-LABEL: f33:
354; CHECK: vgef %v24, 4095(%v26,%r2), 1
355; CHECK: br %r14
356  %elem = extractelement <4 x i32> %index, i32 1
357  %ext = zext i32 %elem to i64
358  %add1 = add i64 %base, %ext
359  %add2 = add i64 %add1, 4095
360  %ptr = inttoptr i64 %add2 to i32 *
361  %element = load i32, i32 *%ptr
362  %ret = insertelement <4 x i32> %val, i32 %element, i32 1
363  ret <4 x i32> %ret
364}
365
366; Test a v2i64 gather of the first element.
367define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) {
368; CHECK-LABEL: f34:
369; CHECK: vgeg %v24, 0(%v26,%r2), 0
370; CHECK: br %r14
371  %elem = extractelement <2 x i64> %index, i32 0
372  %add = add i64 %base, %elem
373  %ptr = inttoptr i64 %add to i64 *
374  %element = load i64, i64 *%ptr
375  %ret = insertelement <2 x i64> %val, i64 %element, i32 0
376  ret <2 x i64> %ret
377}
378
379; Test a v2i64 gather of the last element.
380define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) {
381; CHECK-LABEL: f35:
382; CHECK: vgeg %v24, 0(%v26,%r2), 1
383; CHECK: br %r14
384  %elem = extractelement <2 x i64> %index, i32 1
385  %add = add i64 %base, %elem
386  %ptr = inttoptr i64 %add to i64 *
387  %element = load i64, i64 *%ptr
388  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
389  ret <2 x i64> %ret
390}
391
392; Test a v4f32 gather of the first element.
393define <4 x float> @f36(<4 x float> %val, <4 x i32> %index, i64 %base) {
394; CHECK-LABEL: f36:
395; CHECK: vgef %v24, 0(%v26,%r2), 0
396; CHECK: br %r14
397  %elem = extractelement <4 x i32> %index, i32 0
398  %ext = zext i32 %elem to i64
399  %add = add i64 %base, %ext
400  %ptr = inttoptr i64 %add to float *
401  %element = load float, float *%ptr
402  %ret = insertelement <4 x float> %val, float %element, i32 0
403  ret <4 x float> %ret
404}
405
406; Test a v4f32 gather of the last element.
407define <4 x float> @f37(<4 x float> %val, <4 x i32> %index, i64 %base) {
408; CHECK-LABEL: f37:
409; CHECK: vgef %v24, 0(%v26,%r2), 3
410; CHECK: br %r14
411  %elem = extractelement <4 x i32> %index, i32 3
412  %ext = zext i32 %elem to i64
413  %add = add i64 %base, %ext
414  %ptr = inttoptr i64 %add to float *
415  %element = load float, float *%ptr
416  %ret = insertelement <4 x float> %val, float %element, i32 3
417  ret <4 x float> %ret
418}
419
420; Test a v2f64 gather of the first element.
421define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) {
422; CHECK-LABEL: f38:
423; CHECK: vgeg %v24, 0(%v26,%r2), 0
424; CHECK: br %r14
425  %elem = extractelement <2 x i64> %index, i32 0
426  %add = add i64 %base, %elem
427  %ptr = inttoptr i64 %add to double *
428  %element = load double, double *%ptr
429  %ret = insertelement <2 x double> %val, double %element, i32 0
430  ret <2 x double> %ret
431}
432
433; Test a v2f64 gather of the last element.
434define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) {
435; CHECK-LABEL: f39:
436; CHECK: vgeg %v24, 0(%v26,%r2), 1
437; CHECK: br %r14
438  %elem = extractelement <2 x i64> %index, i32 1
439  %add = add i64 %base, %elem
440  %ptr = inttoptr i64 %add to double *
441  %element = load double, double *%ptr
442  %ret = insertelement <2 x double> %val, double %element, i32 1
443  ret <2 x double> %ret
444}
445
446; Test a v4i32 gather where the load is chained.
447define void @f40(<4 x i32> %val, <4 x i32> %index, i64 %base, <4 x i32> *%res) {
448; CHECK-LABEL: f40:
449; CHECK: vgef %v24, 0(%v26,%r2), 1
450; CHECK: vst %v24, 0(%r3)
451; CHECK: br %r14
452  %elem = extractelement <4 x i32> %index, i32 1
453  %ext = zext i32 %elem to i64
454  %add = add i64 %base, %ext
455  %ptr = inttoptr i64 %add to i32 *
456  %element = load i32, i32 *%ptr
457  %ret = insertelement <4 x i32> %val, i32 %element, i32 1
458  store <4 x i32> %ret, <4 x i32> *%res
459  ret void
460}
461
462; Test a v2i64 gather where the load is chained.
463define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base, <2 x i64> *%res) {
464; CHECK-LABEL: f41:
465; CHECK: vgeg %v24, 0(%v26,%r2), 1
466; CHECK: vst %v24, 0(%r3)
467; CHECK: br %r14
468  %elem = extractelement <2 x i64> %index, i32 1
469  %add = add i64 %base, %elem
470  %ptr = inttoptr i64 %add to i64 *
471  %element = load i64, i64 *%ptr
472  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
473  store <2 x i64> %ret, <2 x i64> *%res
474  ret void
475}
476
477