1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \
4; RUN:   -check-prefix=P9
5; RUN: llc -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
6; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s \
7; RUN:   -check-prefix=P8
8define dso_local void @test(<2 x double>* nocapture %c, double* nocapture readonly %a) local_unnamed_addr {
9; P9-LABEL: test:
10; P9:       # %bb.0: # %entry
11; P9-NEXT:    addi r4, r4, 24
12; P9-NEXT:    lxvdsx vs0, 0, r4
13; P9-NEXT:    stxv vs0, 0(r3)
14; P9-NEXT:    blr
15;
16; P8-LABEL: test:
17; P8:       # %bb.0: # %entry
18; P8-NEXT:    addi r4, r4, 24
19; P8-NEXT:    lxvdsx vs0, 0, r4
20; P8-NEXT:    stxvd2x vs0, 0, r3
21; P8-NEXT:    blr
22entry:
23  %arrayidx = getelementptr inbounds double, double* %a, i64 3
24  %0 = load double, double* %arrayidx, align 8
25  %splat.splatinsert.i = insertelement <2 x double> undef, double %0, i32 0
26  %splat.splat.i = shufflevector <2 x double> %splat.splatinsert.i, <2 x double> undef, <2 x i32> zeroinitializer
27  store <2 x double> %splat.splat.i, <2 x double>* %c, align 16
28  ret void
29}
30
31define dso_local void @test2(<4 x float>* nocapture %c, float* nocapture readonly %a) local_unnamed_addr {
32; P9-LABEL: test2:
33; P9:       # %bb.0: # %entry
34; P9-NEXT:    addi r4, r4, 12
35; P9-NEXT:    lxvwsx vs0, 0, r4
36; P9-NEXT:    stxv vs0, 0(r3)
37; P9-NEXT:    blr
38;
39; P8-LABEL: test2:
40; P8:       # %bb.0: # %entry
41; P8-NEXT:    addi r4, r4, 12
42; P8-NEXT:    lfiwzx f0, 0, r4
43; P8-NEXT:    xxpermdi vs0, f0, f0, 2
44; P8-NEXT:    xxspltw v2, vs0, 3
45; P8-NEXT:    stvx v2, 0, r3
46; P8-NEXT:    blr
47entry:
48  %arrayidx = getelementptr inbounds float, float* %a, i64 3
49  %0 = load float, float* %arrayidx, align 4
50  %splat.splatinsert.i = insertelement <4 x float> undef, float %0, i32 0
51  %splat.splat.i = shufflevector <4 x float> %splat.splatinsert.i, <4 x float> undef, <4 x i32> zeroinitializer
52  store <4 x float> %splat.splat.i, <4 x float>* %c, align 16
53  ret void
54}
55
56define dso_local void @test3(<4 x i32>* nocapture %c, i32* nocapture readonly %a) local_unnamed_addr {
57; P9-LABEL: test3:
58; P9:       # %bb.0: # %entry
59; P9-NEXT:    addi r4, r4, 12
60; P9-NEXT:    lxvwsx vs0, 0, r4
61; P9-NEXT:    stxv vs0, 0(r3)
62; P9-NEXT:    blr
63;
64; P8-LABEL: test3:
65; P8:       # %bb.0: # %entry
66; P8-NEXT:    addi r4, r4, 12
67; P8-NEXT:    lfiwzx f0, 0, r4
68; P8-NEXT:    xxpermdi vs0, f0, f0, 2
69; P8-NEXT:    xxspltw v2, vs0, 3
70; P8-NEXT:    stvx v2, 0, r3
71; P8-NEXT:    blr
72entry:
73  %arrayidx = getelementptr inbounds i32, i32* %a, i64 3
74  %0 = load i32, i32* %arrayidx, align 4
75  %splat.splatinsert.i = insertelement <4 x i32> undef, i32 %0, i32 0
76  %splat.splat.i = shufflevector <4 x i32> %splat.splatinsert.i, <4 x i32> undef, <4 x i32> zeroinitializer
77  store <4 x i32> %splat.splat.i, <4 x i32>* %c, align 16
78  ret void
79}
80
81define dso_local void @test4(<2 x i64>* nocapture %c, i64* nocapture readonly %a) local_unnamed_addr {
82; P9-LABEL: test4:
83; P9:       # %bb.0: # %entry
84; P9-NEXT:    addi r4, r4, 24
85; P9-NEXT:    lxvdsx vs0, 0, r4
86; P9-NEXT:    stxv vs0, 0(r3)
87; P9-NEXT:    blr
88;
89; P8-LABEL: test4:
90; P8:       # %bb.0: # %entry
91; P8-NEXT:    addi r4, r4, 24
92; P8-NEXT:    lxvdsx vs0, 0, r4
93; P8-NEXT:    stxvd2x vs0, 0, r3
94; P8-NEXT:    blr
95entry:
96  %arrayidx = getelementptr inbounds i64, i64* %a, i64 3
97  %0 = load i64, i64* %arrayidx, align 8
98  %splat.splatinsert.i = insertelement <2 x i64> undef, i64 %0, i32 0
99  %splat.splat.i = shufflevector <2 x i64> %splat.splatinsert.i, <2 x i64> undef, <2 x i32> zeroinitializer
100  store <2 x i64> %splat.splat.i, <2 x i64>* %c, align 16
101  ret void
102}
103
104define <16 x i8> @unadjusted_lxvwsx(i32* %s, i32* %t) {
105; P9-LABEL: unadjusted_lxvwsx:
106; P9:       # %bb.0: # %entry
107; P9-NEXT:    lxvwsx v2, 0, r3
108; P9-NEXT:    blr
109;
110; P8-LABEL: unadjusted_lxvwsx:
111; P8:       # %bb.0: # %entry
112; P8-NEXT:    lfiwzx f0, 0, r3
113; P8-NEXT:    xxpermdi vs0, f0, f0, 2
114; P8-NEXT:    xxspltw v2, vs0, 3
115; P8-NEXT:    blr
116  entry:
117    %0 = bitcast i32* %s to <4 x i8>*
118    %1 = load <4 x i8>, <4 x i8>* %0, align 4
119    %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
120    ret <16 x i8> %2
121}
122
123define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
124; P9-LABEL: adjusted_lxvwsx:
125; P9:       # %bb.0: # %entry
126; P9-NEXT:    addi r3, r3, 4
127; P9-NEXT:    lxvwsx v2, 0, r3
128; P9-NEXT:    blr
129;
130; P8-LABEL: adjusted_lxvwsx:
131; P8:       # %bb.0: # %entry
132; P8-NEXT:    ld r3, 0(r3)
133; P8-NEXT:    mtvsrd f0, r3
134; P8-NEXT:    xxswapd v2, vs0
135; P8-NEXT:    xxspltw v2, v2, 2
136; P8-NEXT:    blr
137  entry:
138    %0 = bitcast i64* %s to <8 x i8>*
139    %1 = load <8 x i8>, <8 x i8>* %0, align 8
140    %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
141    ret <16 x i8> %2
142}
143
144define <16 x i8> @unadjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
145; P9-LABEL: unadjusted_lxvwsx_v16i8:
146; P9:       # %bb.0: # %entry
147; P9-NEXT:    lxvwsx v2, 0, r3
148; P9-NEXT:    blr
149;
150; P8-LABEL: unadjusted_lxvwsx_v16i8:
151; P8:       # %bb.0: # %entry
152; P8-NEXT:    lvx v2, 0, r3
153; P8-NEXT:    xxspltw v2, v2, 3
154; P8-NEXT:    blr
155  entry:
156    %0 = load <16 x i8>, <16 x i8>* %s, align 16
157    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
158    ret <16 x i8> %1
159}
160
161define <16 x i8> @adjusted_lxvwsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
162; P9-LABEL: adjusted_lxvwsx_v16i8:
163; P9:       # %bb.0: # %entry
164; P9-NEXT:    addi r3, r3, 4
165; P9-NEXT:    lxvwsx v2, 0, r3
166; P9-NEXT:    blr
167;
168; P8-LABEL: adjusted_lxvwsx_v16i8:
169; P8:       # %bb.0: # %entry
170; P8-NEXT:    lvx v2, 0, r3
171; P8-NEXT:    xxspltw v2, v2, 2
172; P8-NEXT:    blr
173  entry:
174    %0 = load <16 x i8>, <16 x i8>* %s, align 16
175    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
176    ret <16 x i8> %1
177}
178
179define <16 x i8> @adjusted_lxvwsx_v16i8_2(<16 x i8> *%s, <16 x i8> %t) {
180; P9-LABEL: adjusted_lxvwsx_v16i8_2:
181; P9:       # %bb.0: # %entry
182; P9-NEXT:    addi r3, r3, 8
183; P9-NEXT:    lxvwsx v2, 0, r3
184; P9-NEXT:    blr
185;
186; P8-LABEL: adjusted_lxvwsx_v16i8_2:
187; P8:       # %bb.0: # %entry
188; P8-NEXT:    lvx v2, 0, r3
189; P8-NEXT:    xxspltw v2, v2, 1
190; P8-NEXT:    blr
191  entry:
192    %0 = load <16 x i8>, <16 x i8>* %s, align 16
193    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>
194    ret <16 x i8> %1
195}
196
197define <16 x i8> @adjusted_lxvwsx_v16i8_3(<16 x i8> *%s, <16 x i8> %t) {
198; P9-LABEL: adjusted_lxvwsx_v16i8_3:
199; P9:       # %bb.0: # %entry
200; P9-NEXT:    addi r3, r3, 12
201; P9-NEXT:    lxvwsx v2, 0, r3
202; P9-NEXT:    blr
203;
204; P8-LABEL: adjusted_lxvwsx_v16i8_3:
205; P8:       # %bb.0: # %entry
206; P8-NEXT:    lvx v2, 0, r3
207; P8-NEXT:    xxspltw v2, v2, 0
208; P8-NEXT:    blr
209  entry:
210    %0 = load <16 x i8>, <16 x i8>* %s, align 16
211    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
212    ret <16 x i8> %1
213}
214
215define <16 x i8> @unadjusted_lxvdsx(i64* %s, i64* %t) {
216; P9-LABEL: unadjusted_lxvdsx:
217; P9:       # %bb.0: # %entry
218; P9-NEXT:    lxvdsx v2, 0, r3
219; P9-NEXT:    blr
220;
221; P8-LABEL: unadjusted_lxvdsx:
222; P8:       # %bb.0: # %entry
223; P8-NEXT:    lxvdsx v2, 0, r3
224; P8-NEXT:    blr
225  entry:
226    %0 = bitcast i64* %s to <8 x i8>*
227    %1 = load <8 x i8>, <8 x i8>* %0, align 8
228    %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
229    ret <16 x i8> %2
230}
231
232define <16 x i8> @unadjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
233; P9-LABEL: unadjusted_lxvdsx_v16i8:
234; P9:       # %bb.0: # %entry
235; P9-NEXT:    lxvdsx v2, 0, r3
236; P9-NEXT:    blr
237;
238; P8-LABEL: unadjusted_lxvdsx_v16i8:
239; P8:       # %bb.0: # %entry
240; P8-NEXT:    lxvdsx v2, 0, r3
241; P8-NEXT:    blr
242  entry:
243    %0 = load <16 x i8>, <16 x i8>* %s, align 16
244    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
245    ret <16 x i8> %1
246}
247
248define <16 x i8> @adjusted_lxvdsx_v16i8(<16 x i8> *%s, <16 x i8> %t) {
249; P9-LABEL: adjusted_lxvdsx_v16i8:
250; P9:       # %bb.0: # %entry
251; P9-NEXT:    addi r3, r3, 8
252; P9-NEXT:    lxvdsx v2, 0, r3
253; P9-NEXT:    blr
254;
255; P8-LABEL: adjusted_lxvdsx_v16i8:
256; P8:       # %bb.0: # %entry
257; P8-NEXT:    addi r3, r3, 8
258; P8-NEXT:    lxvdsx v2, 0, r3
259; P8-NEXT:    blr
260  entry:
261    %0 = load <16 x i8>, <16 x i8>* %s, align 16
262    %1 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
263    ret <16 x i8> %1
264}
265