1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
10
11; Function Attrs: norecurse nounwind readonly
12define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec)  {
13; P9LE-LABEL: s2v_test1:
14; P9LE:       # %bb.0: # %entry
15; P9LE-NEXT:    lwz r3, 0(r3)
16; P9LE-NEXT:    mtfprwz f0, r3
17; P9LE-NEXT:    xxinsertw v2, vs0, 12
18; P9LE-NEXT:    blr
19;
20; P9BE-LABEL: s2v_test1:
21; P9BE:       # %bb.0: # %entry
22; P9BE-NEXT:    lwz r3, 0(r3)
23; P9BE-NEXT:    mtfprwz f0, r3
24; P9BE-NEXT:    xxinsertw v2, vs0, 0
25; P9BE-NEXT:    blr
26;
27; P8LE-LABEL: s2v_test1:
28; P8LE:       # %bb.0: # %entry
29; P8LE-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
30; P8LE-NEXT:    lxsiwzx v4, 0, r3
31; P8LE-NEXT:    addi r4, r4, .LCPI0_0@toc@l
32; P8LE-NEXT:    lvx v3, 0, r4
33; P8LE-NEXT:    vperm v2, v2, v4, v3
34; P8LE-NEXT:    blr
35;
36; P8BE-LABEL: s2v_test1:
37; P8BE:       # %bb.0: # %entry
38; P8BE-NEXT:    lfiwzx f0, 0, r3
39; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
40; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
41; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
42; P8BE-NEXT:    blr
43entry:
44  %0 = load i32, i32* %int32, align 4
45  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
46  ret <4 x i32> %vecins
47}
48
49; Function Attrs: norecurse nounwind readonly
50define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec)  {
51; P9LE-LABEL: s2v_test2:
52; P9LE:       # %bb.0: # %entry
53; P9LE-NEXT:    lwz r3, 4(r3)
54; P9LE-NEXT:    mtfprwz f0, r3
55; P9LE-NEXT:    xxinsertw v2, vs0, 12
56; P9LE-NEXT:    blr
57;
58; P9BE-LABEL: s2v_test2:
59; P9BE:       # %bb.0: # %entry
60; P9BE-NEXT:    lwz r3, 4(r3)
61; P9BE-NEXT:    mtfprwz f0, r3
62; P9BE-NEXT:    xxinsertw v2, vs0, 0
63; P9BE-NEXT:    blr
64;
65; P8LE-LABEL: s2v_test2:
66; P8LE:       # %bb.0: # %entry
67; P8LE-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
68; P8LE-NEXT:    addi r3, r3, 4
69; P8LE-NEXT:    addi r4, r4, .LCPI1_0@toc@l
70; P8LE-NEXT:    lxsiwzx v4, 0, r3
71; P8LE-NEXT:    lvx v3, 0, r4
72; P8LE-NEXT:    vperm v2, v2, v4, v3
73; P8LE-NEXT:    blr
74;
75; P8BE-LABEL: s2v_test2:
76; P8BE:       # %bb.0: # %entry
77; P8BE-NEXT:    addi r3, r3, 4
78; P8BE-NEXT:    lfiwzx f0, 0, r3
79; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
80; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
81; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
82; P8BE-NEXT:    blr
83entry:
84  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
85  %0 = load i32, i32* %arrayidx, align 4
86  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
87  ret <4 x i32> %vecins
88}
89
90; Function Attrs: norecurse nounwind readonly
91define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx)  {
92; P9LE-LABEL: s2v_test3:
93; P9LE:       # %bb.0: # %entry
94; P9LE-NEXT:    sldi r4, r7, 2
95; P9LE-NEXT:    lwzx r3, r3, r4
96; P9LE-NEXT:    mtfprwz f0, r3
97; P9LE-NEXT:    xxinsertw v2, vs0, 12
98; P9LE-NEXT:    blr
99;
100; P9BE-LABEL: s2v_test3:
101; P9BE:       # %bb.0: # %entry
102; P9BE-NEXT:    sldi r4, r7, 2
103; P9BE-NEXT:    lwzx r3, r3, r4
104; P9BE-NEXT:    mtfprwz f0, r3
105; P9BE-NEXT:    xxinsertw v2, vs0, 0
106; P9BE-NEXT:    blr
107;
108; P8LE-LABEL: s2v_test3:
109; P8LE:       # %bb.0: # %entry
110; P8LE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
111; P8LE-NEXT:    sldi r5, r7, 2
112; P8LE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
113; P8LE-NEXT:    lxsiwzx v3, r3, r5
114; P8LE-NEXT:    lvx v4, 0, r4
115; P8LE-NEXT:    vperm v2, v2, v3, v4
116; P8LE-NEXT:    blr
117;
118; P8BE-LABEL: s2v_test3:
119; P8BE:       # %bb.0: # %entry
120; P8BE-NEXT:    sldi r4, r7, 2
121; P8BE-NEXT:    lfiwzx f0, r3, r4
122; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
123; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
124; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
125; P8BE-NEXT:    blr
126entry:
127  %idxprom = sext i32 %Idx to i64
128  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
129  %0 = load i32, i32* %arrayidx, align 4
130  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
131  ret <4 x i32> %vecins
132}
133
134; Function Attrs: norecurse nounwind readonly
135define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec)  {
136; P9LE-LABEL: s2v_test4:
137; P9LE:       # %bb.0: # %entry
138; P9LE-NEXT:    lwz r3, 4(r3)
139; P9LE-NEXT:    mtfprwz f0, r3
140; P9LE-NEXT:    xxinsertw v2, vs0, 12
141; P9LE-NEXT:    blr
142;
143; P9BE-LABEL: s2v_test4:
144; P9BE:       # %bb.0: # %entry
145; P9BE-NEXT:    lwz r3, 4(r3)
146; P9BE-NEXT:    mtfprwz f0, r3
147; P9BE-NEXT:    xxinsertw v2, vs0, 0
148; P9BE-NEXT:    blr
149;
150; P8LE-LABEL: s2v_test4:
151; P8LE:       # %bb.0: # %entry
152; P8LE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
153; P8LE-NEXT:    addi r3, r3, 4
154; P8LE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
155; P8LE-NEXT:    lxsiwzx v4, 0, r3
156; P8LE-NEXT:    lvx v3, 0, r4
157; P8LE-NEXT:    vperm v2, v2, v4, v3
158; P8LE-NEXT:    blr
159;
160; P8BE-LABEL: s2v_test4:
161; P8BE:       # %bb.0: # %entry
162; P8BE-NEXT:    addi r3, r3, 4
163; P8BE-NEXT:    lfiwzx f0, 0, r3
164; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
165; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
166; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
167; P8BE-NEXT:    blr
168entry:
169  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
170  %0 = load i32, i32* %arrayidx, align 4
171  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
172  ret <4 x i32> %vecins
173}
174
175; Function Attrs: norecurse nounwind readonly
176define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1)  {
177; P9LE-LABEL: s2v_test5:
178; P9LE:       # %bb.0: # %entry
179; P9LE-NEXT:    lwz r3, 0(r5)
180; P9LE-NEXT:    mtfprwz f0, r3
181; P9LE-NEXT:    xxinsertw v2, vs0, 12
182; P9LE-NEXT:    blr
183;
184; P9BE-LABEL: s2v_test5:
185; P9BE:       # %bb.0: # %entry
186; P9BE-NEXT:    lwz r3, 0(r5)
187; P9BE-NEXT:    mtfprwz f0, r3
188; P9BE-NEXT:    xxinsertw v2, vs0, 0
189; P9BE-NEXT:    blr
190;
191; P8LE-LABEL: s2v_test5:
192; P8LE:       # %bb.0: # %entry
193; P8LE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
194; P8LE-NEXT:    lxsiwzx v4, 0, r5
195; P8LE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
196; P8LE-NEXT:    lvx v3, 0, r3
197; P8LE-NEXT:    vperm v2, v2, v4, v3
198; P8LE-NEXT:    blr
199;
200; P8BE-LABEL: s2v_test5:
201; P8BE:       # %bb.0: # %entry
202; P8BE-NEXT:    lfiwzx f0, 0, r5
203; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
204; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
205; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
206; P8BE-NEXT:    blr
207entry:
208  %0 = load i32, i32* %ptr1, align 4
209  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
210  ret <4 x i32> %vecins
211}
212
213; Function Attrs: norecurse nounwind readonly
214define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec)  {
215; P9LE-LABEL: s2v_test_f1:
216; P9LE:       # %bb.0: # %entry
217; P9LE-NEXT:    lfs f0, 0(r3)
218; P9LE-NEXT:    xscvdpspn vs0, f0
219; P9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
220; P9LE-NEXT:    xxinsertw v2, vs0, 12
221; P9LE-NEXT:    blr
222;
223; P9BE-LABEL: s2v_test_f1:
224; P9BE:       # %bb.0: # %entry
225; P9BE-NEXT:    lfs f0, 0(r3)
226; P9BE-NEXT:    xscvdpspn vs0, f0
227; P9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
228; P9BE-NEXT:    xxinsertw v2, vs0, 0
229; P9BE-NEXT:    blr
230;
231; P8LE-LABEL: s2v_test_f1:
232; P8LE:       # %bb.0: # %entry
233; P8LE-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
234; P8LE-NEXT:    lxsiwzx v4, 0, r3
235; P8LE-NEXT:    addi r4, r4, .LCPI5_0@toc@l
236; P8LE-NEXT:    lvx v3, 0, r4
237; P8LE-NEXT:    vperm v2, v2, v4, v3
238; P8LE-NEXT:    blr
239;
240; P8BE-LABEL: s2v_test_f1:
241; P8BE:       # %bb.0: # %entry
242; P8BE-NEXT:    lfiwzx f0, 0, r3
243; P8BE-NEXT:    xxsldwi vs0, f0, f0, 1
244; P8BE-NEXT:    xxsldwi vs0, v2, vs0, 1
245; P8BE-NEXT:    xxsldwi v2, vs0, vs0, 3
246; P8BE-NEXT:    blr
247entry:
248  %0 = load float, float* %f64, align 4
249  %vecins = insertelement <4 x float> %vec, float %0, i32 0
250  ret <4 x float> %vecins
251}
252
253; Function Attrs: norecurse nounwind readonly
254define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec)  {
255; P9LE-LABEL: s2v_test_f2:
256; P9LE:       # %bb.0: # %entry
257; P9LE-NEXT:    addi r3, r3, 4
258; P9LE-NEXT:    vmrglw v2, v2, v2
259; P9LE-NEXT:    lxsiwzx v3, 0, r3
260; P9LE-NEXT:    vmrghw v2, v2, v3
261; P9LE-NEXT:    blr
262;
263; P9BE-LABEL: s2v_test_f2:
264; P9BE:       # %bb.0: # %entry
265; P9BE-NEXT:    addi r3, r3, 4
266; P9BE-NEXT:    xxspltw v2, v2, 1
267; P9BE-NEXT:    lfiwzx f0, 0, r3
268; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
269; P9BE-NEXT:    vmrghw v2, v3, v2
270; P9BE-NEXT:    blr
271;
272; P8LE-LABEL: s2v_test_f2:
273; P8LE:       # %bb.0: # %entry
274; P8LE-NEXT:    vmrglw v2, v2, v2
275; P8LE-NEXT:    addi r3, r3, 4
276; P8LE-NEXT:    lxsiwzx v3, 0, r3
277; P8LE-NEXT:    vmrghw v2, v2, v3
278; P8LE-NEXT:    blr
279;
280; P8BE-LABEL: s2v_test_f2:
281; P8BE:       # %bb.0: # %entry
282; P8BE-NEXT:    addi r3, r3, 4
283; P8BE-NEXT:    xxspltw v2, v2, 1
284; P8BE-NEXT:    lfiwzx f0, 0, r3
285; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
286; P8BE-NEXT:    vmrghw v2, v3, v2
287; P8BE-NEXT:    blr
288entry:
289  %arrayidx = getelementptr inbounds float, float* %f64, i64 1
290  %0 = load float, float* %arrayidx, align 8
291  %vecins = insertelement <2 x float> %vec, float %0, i32 0
292  ret <2 x float> %vecins
293}
294
295; Function Attrs: norecurse nounwind readonly
296define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx)  {
297; P9LE-LABEL: s2v_test_f3:
298; P9LE:       # %bb.0: # %entry
299; P9LE-NEXT:    sldi r4, r7, 2
300; P9LE-NEXT:    vmrglw v2, v2, v2
301; P9LE-NEXT:    lxsiwzx v3, r3, r4
302; P9LE-NEXT:    vmrghw v2, v2, v3
303; P9LE-NEXT:    blr
304;
305; P9BE-LABEL: s2v_test_f3:
306; P9BE:       # %bb.0: # %entry
307; P9BE-NEXT:    sldi r4, r7, 2
308; P9BE-NEXT:    xxspltw v2, v2, 1
309; P9BE-NEXT:    lfiwzx f0, r3, r4
310; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
311; P9BE-NEXT:    vmrghw v2, v3, v2
312; P9BE-NEXT:    blr
313;
314; P8LE-LABEL: s2v_test_f3:
315; P8LE:       # %bb.0: # %entry
316; P8LE-NEXT:    vmrglw v2, v2, v2
317; P8LE-NEXT:    sldi r4, r7, 2
318; P8LE-NEXT:    lxsiwzx v3, r3, r4
319; P8LE-NEXT:    vmrghw v2, v2, v3
320; P8LE-NEXT:    blr
321;
322; P8BE-LABEL: s2v_test_f3:
323; P8BE:       # %bb.0: # %entry
324; P8BE-NEXT:    sldi r4, r7, 2
325; P8BE-NEXT:    xxspltw v2, v2, 1
326; P8BE-NEXT:    lfiwzx f0, r3, r4
327; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
328; P8BE-NEXT:    vmrghw v2, v3, v2
329; P8BE-NEXT:    blr
330entry:
331  %idxprom = sext i32 %Idx to i64
332  %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
333  %0 = load float, float* %arrayidx, align 8
334  %vecins = insertelement <2 x float> %vec, float %0, i32 0
335  ret <2 x float> %vecins
336}
337
338; Function Attrs: norecurse nounwind readonly
339define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec)  {
340; P9LE-LABEL: s2v_test_f4:
341; P9LE:       # %bb.0: # %entry
342; P9LE-NEXT:    addi r3, r3, 4
343; P9LE-NEXT:    vmrglw v2, v2, v2
344; P9LE-NEXT:    lxsiwzx v3, 0, r3
345; P9LE-NEXT:    vmrghw v2, v2, v3
346; P9LE-NEXT:    blr
347;
348; P9BE-LABEL: s2v_test_f4:
349; P9BE:       # %bb.0: # %entry
350; P9BE-NEXT:    addi r3, r3, 4
351; P9BE-NEXT:    xxspltw v2, v2, 1
352; P9BE-NEXT:    lfiwzx f0, 0, r3
353; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
354; P9BE-NEXT:    vmrghw v2, v3, v2
355; P9BE-NEXT:    blr
356;
357; P8LE-LABEL: s2v_test_f4:
358; P8LE:       # %bb.0: # %entry
359; P8LE-NEXT:    vmrglw v2, v2, v2
360; P8LE-NEXT:    addi r3, r3, 4
361; P8LE-NEXT:    lxsiwzx v3, 0, r3
362; P8LE-NEXT:    vmrghw v2, v2, v3
363; P8LE-NEXT:    blr
364;
365; P8BE-LABEL: s2v_test_f4:
366; P8BE:       # %bb.0: # %entry
367; P8BE-NEXT:    addi r3, r3, 4
368; P8BE-NEXT:    xxspltw v2, v2, 1
369; P8BE-NEXT:    lfiwzx f0, 0, r3
370; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
371; P8BE-NEXT:    vmrghw v2, v3, v2
372; P8BE-NEXT:    blr
373entry:
374  %arrayidx = getelementptr inbounds float, float* %f64, i64 1
375  %0 = load float, float* %arrayidx, align 8
376  %vecins = insertelement <2 x float> %vec, float %0, i32 0
377  ret <2 x float> %vecins
378}
379
380; Function Attrs: norecurse nounwind readonly
381define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1)  {
382; P9LE-LABEL: s2v_test_f5:
383; P9LE:       # %bb.0: # %entry
384; P9LE-NEXT:    lxsiwzx v3, 0, r5
385; P9LE-NEXT:    vmrglw v2, v2, v2
386; P9LE-NEXT:    vmrghw v2, v2, v3
387; P9LE-NEXT:    blr
388;
389; P9BE-LABEL: s2v_test_f5:
390; P9BE:       # %bb.0: # %entry
391; P9BE-NEXT:    lfiwzx f0, 0, r5
392; P9BE-NEXT:    xxspltw v2, v2, 1
393; P9BE-NEXT:    xxsldwi v3, f0, f0, 1
394; P9BE-NEXT:    vmrghw v2, v3, v2
395; P9BE-NEXT:    blr
396;
397; P8LE-LABEL: s2v_test_f5:
398; P8LE:       # %bb.0: # %entry
399; P8LE-NEXT:    vmrglw v2, v2, v2
400; P8LE-NEXT:    lxsiwzx v3, 0, r5
401; P8LE-NEXT:    vmrghw v2, v2, v3
402; P8LE-NEXT:    blr
403;
404; P8BE-LABEL: s2v_test_f5:
405; P8BE:       # %bb.0: # %entry
406; P8BE-NEXT:    lfiwzx f0, 0, r5
407; P8BE-NEXT:    xxspltw v2, v2, 1
408; P8BE-NEXT:    xxsldwi v3, f0, f0, 1
409; P8BE-NEXT:    vmrghw v2, v3, v2
410; P8BE-NEXT:    blr
411entry:
412  %0 = load float, float* %ptr1, align 8
413  %vecins = insertelement <2 x float> %vec, float %0, i32 0
414  ret <2 x float> %vecins
415}
416
417