1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
4; RUN:   -check-prefix=P9
5; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
6; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
7; RUN:   -check-prefix=P8
8define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr {
9; P9-LABEL: test:
10; P9:       # %bb.0: # %entry
11; P9-NEXT:    addi r4, r4, 24
12; P9-NEXT:    lxvdsx vs0, 0, r4
13; P9-NEXT:    stxv vs0, 0(r3)
14; P9-NEXT:    blr
15;
16; P8-LABEL: test:
17; P8:       # %bb.0: # %entry
18; P8-NEXT:    addi r4, r4, 24
19; P8-NEXT:    lxvdsx vs0, 0, r4
20; P8-NEXT:    stxvd2x vs0, 0, r3
21; P8-NEXT:    blr
22entry:
23  %arrayidx = getelementptr inbounds double, double* %a, i64 3
24  %0 = load double, double* %arrayidx, align 8
25  %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0
26  %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer
27  store <2 x double> %splat.splat.i, <2 x double>* %c, align 16
28  ret void
29}
30
31define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr {
32; P9-LABEL: test2:
33; P9:       # %bb.0: # %entry
34; P9-NEXT:    addi r4, r4, 12
35; P9-NEXT:    lxvwsx vs0, 0, r4
36; P9-NEXT:    stxv vs0, 0(r3)
37; P9-NEXT:    blr
38;
39; P8-LABEL: test2:
40; P8:       # %bb.0: # %entry
41; P8-NEXT:    addi r4, r4, 12
42; P8-NEXT:    lfiwzx f0, 0, r4
43; P8-NEXT:    xxspltw v2, vs0, 1
44; P8-NEXT:    stvx v2, 0, r3
45; P8-NEXT:    blr
46entry:
47  %arrayidx = getelementptr inbounds float, float* %a, i64 3
48  %0 = load float, float* %arrayidx, align 4
49  %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0
50  %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer
51  store <4 x float> %splat.splat.i, <4 x float>* %c, align 16
52  ret void
53}
54
55define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr {
56; P9-LABEL: test3:
57; P9:       # %bb.0: # %entry
58; P9-NEXT:    addi r4, r4, 12
59; P9-NEXT:    lxvwsx vs0, 0, r4
60; P9-NEXT:    stxv vs0, 0(r3)
61; P9-NEXT:    blr
62;
63; P8-LABEL: test3:
64; P8:       # %bb.0: # %entry
65; P8-NEXT:    addi r4, r4, 12
66; P8-NEXT:    lfiwzx f0, 0, r4
67; P8-NEXT:    xxspltw v2, vs0, 1
68; P8-NEXT:    stvx v2, 0, r3
69; P8-NEXT:    blr
70entry:
71  %arrayidx = getelementptr inbounds i32, i32* %a, i64 3
72  %0 = load i32, i32* %arrayidx, align 4
73  %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0
74  %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
75  store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16
76  ret void
77}
78
79define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr {
80; P9-LABEL: test4:
81; P9:       # %bb.0: # %entry
82; P9-NEXT:    addi r4, r4, 24
83; P9-NEXT:    lxvdsx vs0, 0, r4
84; P9-NEXT:    stxv vs0, 0(r3)
85; P9-NEXT:    blr
86;
87; P8-LABEL: test4:
88; P8:       # %bb.0: # %entry
89; P8-NEXT:    addi r4, r4, 24
90; P8-NEXT:    lxvdsx vs0, 0, r4
91; P8-NEXT:    stxvd2x vs0, 0, r3
92; P8-NEXT:    blr
93entry:
94  %arrayidx = getelementptr inbounds i64, i64* %a, i64 3
95  %0 = load i64, i64* %arrayidx, align 8
96  %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0
97  %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
98  store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16
99  ret void
100}
101
102define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
103; P9-LABEL: unadjusted_lxvwsx:
104; P9:       # %bb.0: # %entry
105; P9-NEXT:    lxvwsx v2, 0, r3
106; P9-NEXT:    blr
107;
108; P8-LABEL: unadjusted_lxvwsx:
109; P8:       # %bb.0: # %entry
110; P8-NEXT:    lfiwzx f0, 0, r3
111; P8-NEXT:    xxspltw v2, vs0, 1
112; P8-NEXT:    blr
113  entry:
114    %0 = bitcast i32* %s to <4 x i8>*
115    %1 = load <4 x i8>, <4 x i8>* %0, align 4
116    %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
117    ret <16 x i8> %2
118}
119
120define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
121; P9-LABEL: adjusted_lxvwsx:
122; P9:       # %bb.0: # %entry
123; P9-NEXT:    addi r3, r3, 4
124; P9-NEXT:    lxvwsx v2, 0, r3
125; P9-NEXT:    blr
126;
127; P8-LABEL: adjusted_lxvwsx:
128; P8:       # %bb.0: # %entry
129; P8-NEXT:    lfdx f0, 0, r3
130; P8-NEXT:    xxspltw v2, vs0, 0
131; P8-NEXT:    blr
132  entry:
133    %0 = bitcast i64* %s to <8 x i8>*
134    %1 = load <8 x i8>, <8 x i8>* %0, align 8
135    %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
136    ret <16 x i8> %2
137}
138
139define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
140; P9-LABEL: unadjusted_lxvwsx_v16i8:
141; P9:       # %bb.0: # %entry
142; P9-NEXT:    lxvwsx v2, 0, r3
143; P9-NEXT:    blr
144;
145; P8-LABEL: unadjusted_lxvwsx_v16i8:
146; P8:       # %bb.0: # %entry
147; P8-NEXT:    lvx v2, 0, r3
148; P8-NEXT:    xxspltw v2, v2, 3
149; P8-NEXT:    blr
150  entry:
151    %0 = load <16 x i8>, <16 x i8>* %s, align 16
152    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
153    ret <16 x i8> %1
154}
155
156define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
157; P9-LABEL: adjusted_lxvwsx_v16i8:
158; P9:       # %bb.0: # %entry
159; P9-NEXT:    addi r3, r3, 4
160; P9-NEXT:    lxvwsx v2, 0, r3
161; P9-NEXT:    blr
162;
163; P8-LABEL: adjusted_lxvwsx_v16i8:
164; P8:       # %bb.0: # %entry
165; P8-NEXT:    lvx v2, 0, r3
166; P8-NEXT:    xxspltw v2, v2, 2
167; P8-NEXT:    blr
168  entry:
169    %0 = load <16 x i8>, <16 x i8>* %s, align 16
170    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
171    ret <16 x i8> %1
172}
173
174define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) {
175; P9-LABEL: adjusted_lxvwsx_v16i8_2:
176; P9:       # %bb.0: # %entry
177; P9-NEXT:    addi r3, r3, 8
178; P9-NEXT:    lxvwsx v2, 0, r3
179; P9-NEXT:    blr
180;
181; P8-LABEL: adjusted_lxvwsx_v16i8_2:
182; P8:       # %bb.0: # %entry
183; P8-NEXT:    lvx v2, 0, r3
184; P8-NEXT:    xxspltw v2, v2, 1
185; P8-NEXT:    blr
186  entry:
187    %0 = load <16 x i8>, <16 x i8>* %s, align 16
188    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
189    ret <16 x i8> %1
190}
191
192define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) {
193; P9-LABEL: adjusted_lxvwsx_v16i8_3:
194; P9:       # %bb.0: # %entry
195; P9-NEXT:    addi r3, r3, 12
196; P9-NEXT:    lxvwsx v2, 0, r3
197; P9-NEXT:    blr
198;
199; P8-LABEL: adjusted_lxvwsx_v16i8_3:
200; P8:       # %bb.0: # %entry
201; P8-NEXT:    lvx v2, 0, r3
202; P8-NEXT:    xxspltw v2, v2, 0
203; P8-NEXT:    blr
204  entry:
205    %0 = load <16 x i8>, <16 x i8>* %s, align 16
206    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
207    ret <16 x i8> %1
208}
209
210define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
211; P9-LABEL: unadjusted_lxvdsx:
212; P9:       # %bb.0: # %entry
213; P9-NEXT:    lxvdsx v2, 0, r3
214; P9-NEXT:    blr
215;
216; P8-LABEL: unadjusted_lxvdsx:
217; P8:       # %bb.0: # %entry
218; P8-NEXT:    lxvdsx v2, 0, r3
219; P8-NEXT:    blr
220  entry:
221    %0 = bitcast i64* %s to <8 x i8>*
222    %1 = load <8 x i8>, <8 x i8>* %0, align 8
223    %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
224    ret <16 x i8> %2
225}
226
227define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
228; P9-LABEL: unadjusted_lxvdsx_v16i8:
229; P9:       # %bb.0: # %entry
230; P9-NEXT:    lxvdsx v2, 0, r3
231; P9-NEXT:    blr
232;
233; P8-LABEL: unadjusted_lxvdsx_v16i8:
234; P8:       # %bb.0: # %entry
235; P8-NEXT:    lxvdsx v2, 0, r3
236; P8-NEXT:    blr
237  entry:
238    %0 = load <16 x i8>, <16 x i8>* %s, align 16
239    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
240    ret <16 x i8> %1
241}
242
243define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
244; P9-LABEL: adjusted_lxvdsx_v16i8:
245; P9:       # %bb.0: # %entry
246; P9-NEXT:    addi r3, r3, 8
247; P9-NEXT:    lxvdsx v2, 0, r3
248; P9-NEXT:    blr
249;
250; P8-LABEL: adjusted_lxvdsx_v16i8:
251; P8:       # %bb.0: # %entry
252; P8-NEXT:    addi r3, r3, 8
253; P8-NEXT:    lxvdsx v2, 0, r3
254; P8-NEXT:    blr
255  entry:
256    %0 = load <16 x i8>, <16 x i8>* %s, align 16
257    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
258    ret <16 x i8> %1
259}
260