1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g3 | FileCheck %s --check-prefixes=ALL,G3
3; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- -mcpu=g5 | FileCheck %s --check-prefixes=ALL,G5
4
5; Test that vectors are scalarized/lowered correctly.
6
7%f4 = type <4 x float>
8%i4 = type <4 x i32>
9
10define void @splat(%f4* %P, %f4* %Q, float %X) nounwind {
11; G3-LABEL: splat:
12; G3:       # %bb.0:
13; G3-NEXT:    lfs 0, 0(4)
14; G3-NEXT:    lfs 2, 8(4)
15; G3-NEXT:    lfs 3, 4(4)
16; G3-NEXT:    lfs 4, 12(4)
17; G3-NEXT:    fadds 0, 0, 1
18; G3-NEXT:    fadds 2, 2, 1
19; G3-NEXT:    fadds 3, 3, 1
20; G3-NEXT:    fadds 1, 4, 1
21; G3-NEXT:    stfs 1, 12(3)
22; G3-NEXT:    stfs 2, 8(3)
23; G3-NEXT:    stfs 3, 4(3)
24; G3-NEXT:    stfs 0, 0(3)
25; G3-NEXT:    blr
26;
27; G5-LABEL: splat:
28; G5:       # %bb.0:
29; G5-NEXT:    stwu 1, -32(1)
30; G5-NEXT:    stfs 1, 16(1)
31; G5-NEXT:    addi 5, 1, 16
32; G5-NEXT:    lvx 2, 0, 5
33; G5-NEXT:    lvx 3, 0, 4
34; G5-NEXT:    vspltw 2, 2, 0
35; G5-NEXT:    vaddfp 2, 3, 2
36; G5-NEXT:    stvx 2, 0, 3
37; G5-NEXT:    addi 1, 1, 32
38; G5-NEXT:    blr
39  %tmp = insertelement %f4 undef, float %X, i32 0   ; <%f4> [#uses=1]
40  %tmp2 = insertelement %f4 %tmp, float %X, i32 1   ; <%f4> [#uses=1]
41  %tmp4 = insertelement %f4 %tmp2, float %X, i32 2    ; <%f4> [#uses=1]
42  %tmp6 = insertelement %f4 %tmp4, float %X, i32 3    ; <%f4> [#uses=1]
43  %q = load %f4, %f4* %Q         ; <%f4> [#uses=1]
44  %R = fadd %f4 %q, %tmp6    ; <%f4> [#uses=1]
45  store %f4 %R, %f4* %P
46  ret void
47}
48
49define void @splat_i4(%i4* %P, %i4* %Q, i32 %X) nounwind {
50; G3-LABEL: splat_i4:
51; G3:       # %bb.0:
52; G3-NEXT:    lwz 6, 0(4)
53; G3-NEXT:    lwz 7, 8(4)
54; G3-NEXT:    lwz 8, 4(4)
55; G3-NEXT:    lwz 4, 12(4)
56; G3-NEXT:    add 6, 6, 5
57; G3-NEXT:    add 8, 8, 5
58; G3-NEXT:    add 7, 7, 5
59; G3-NEXT:    add 4, 4, 5
60; G3-NEXT:    stw 4, 12(3)
61; G3-NEXT:    stw 7, 8(3)
62; G3-NEXT:    stw 8, 4(3)
63; G3-NEXT:    stw 6, 0(3)
64; G3-NEXT:    blr
65;
66; G5-LABEL: splat_i4:
67; G5:       # %bb.0:
68; G5-NEXT:    stwu 1, -32(1)
69; G5-NEXT:    stw 5, 16(1)
70; G5-NEXT:    addi 5, 1, 16
71; G5-NEXT:    lvx 2, 0, 5
72; G5-NEXT:    lvx 3, 0, 4
73; G5-NEXT:    vspltw 2, 2, 0
74; G5-NEXT:    vadduwm 2, 3, 2
75; G5-NEXT:    stvx 2, 0, 3
76; G5-NEXT:    addi 1, 1, 32
77; G5-NEXT:    blr
78  %tmp = insertelement %i4 undef, i32 %X, i32 0     ; <%i4> [#uses=1]
79  %tmp2 = insertelement %i4 %tmp, i32 %X, i32 1     ; <%i4> [#uses=1]
80  %tmp4 = insertelement %i4 %tmp2, i32 %X, i32 2    ; <%i4> [#uses=1]
81  %tmp6 = insertelement %i4 %tmp4, i32 %X, i32 3    ; <%i4> [#uses=1]
82  %q = load %i4, %i4* %Q         ; <%i4> [#uses=1]
83  %R = add %i4 %q, %tmp6    ; <%i4> [#uses=1]
84  store %i4 %R, %i4* %P
85  ret void
86}
87
88define void @splat_imm_i32(%i4* %P, %i4* %Q, i32 %X) nounwind {
89; G3-LABEL: splat_imm_i32:
90; G3:       # %bb.0:
91; G3-NEXT:    lwz 5, 0(4)
92; G3-NEXT:    lwz 6, 8(4)
93; G3-NEXT:    lwz 7, 4(4)
94; G3-NEXT:    lwz 4, 12(4)
95; G3-NEXT:    addi 5, 5, -1
96; G3-NEXT:    addi 7, 7, -1
97; G3-NEXT:    addi 6, 6, -1
98; G3-NEXT:    addi 4, 4, -1
99; G3-NEXT:    stw 4, 12(3)
100; G3-NEXT:    stw 6, 8(3)
101; G3-NEXT:    stw 7, 4(3)
102; G3-NEXT:    stw 5, 0(3)
103; G3-NEXT:    blr
104;
105; G5-LABEL: splat_imm_i32:
106; G5:       # %bb.0:
107; G5-NEXT:    lvx 2, 0, 4
108; G5-NEXT:    vspltisb 3, -1
109; G5-NEXT:    vadduwm 2, 2, 3
110; G5-NEXT:    stvx 2, 0, 3
111; G5-NEXT:    blr
112  %q = load %i4, %i4* %Q         ; <%i4> [#uses=1]
113  %R = add %i4 %q, < i32 -1, i32 -1, i32 -1, i32 -1 >       ; <%i4> [#uses=1]
114  store %i4 %R, %i4* %P
115  ret void
116}
117
118define void @splat_imm_i16(%i4* %P, %i4* %Q, i32 %X) nounwind {
119; G3-LABEL: splat_imm_i16:
120; G3:       # %bb.0:
121; G3-NEXT:    lwz 5, 0(4)
122; G3-NEXT:    lwz 6, 8(4)
123; G3-NEXT:    lwz 7, 4(4)
124; G3-NEXT:    lwz 4, 12(4)
125; G3-NEXT:    addi 5, 5, 1
126; G3-NEXT:    addi 7, 7, 1
127; G3-NEXT:    addi 6, 6, 1
128; G3-NEXT:    addi 4, 4, 1
129; G3-NEXT:    addis 5, 5, 1
130; G3-NEXT:    addis 7, 7, 1
131; G3-NEXT:    addis 6, 6, 1
132; G3-NEXT:    addis 4, 4, 1
133; G3-NEXT:    stw 4, 12(3)
134; G3-NEXT:    stw 6, 8(3)
135; G3-NEXT:    stw 7, 4(3)
136; G3-NEXT:    stw 5, 0(3)
137; G3-NEXT:    blr
138;
139; G5-LABEL: splat_imm_i16:
140; G5:       # %bb.0:
141; G5-NEXT:    lvx 2, 0, 4
142; G5-NEXT:    vspltish 3, 1
143; G5-NEXT:    vadduwm 2, 2, 3
144; G5-NEXT:    stvx 2, 0, 3
145; G5-NEXT:    blr
146  %q = load %i4, %i4* %Q         ; <%i4> [#uses=1]
147  %R = add %i4 %q, < i32 65537, i32 65537, i32 65537, i32 65537 >   ; <%i4> [#uses=1]
148  store %i4 %R, %i4* %P
149  ret void
150}
151
152define void @splat_h(i16 %tmp, <16 x i8>* %dst) nounwind {
153; G3-LABEL: splat_h:
154; G3:       # %bb.0:
155; G3-NEXT:    sth 3, 14(4)
156; G3-NEXT:    sth 3, 12(4)
157; G3-NEXT:    sth 3, 10(4)
158; G3-NEXT:    sth 3, 8(4)
159; G3-NEXT:    sth 3, 6(4)
160; G3-NEXT:    sth 3, 4(4)
161; G3-NEXT:    sth 3, 2(4)
162; G3-NEXT:    sth 3, 0(4)
163; G3-NEXT:    blr
164;
165; G5-LABEL: splat_h:
166; G5:       # %bb.0:
167; G5-NEXT:    stwu 1, -32(1)
168; G5-NEXT:    sth 3, 16(1)
169; G5-NEXT:    addi 3, 1, 16
170; G5-NEXT:    lvx 2, 0, 3
171; G5-NEXT:    vsplth 2, 2, 0
172; G5-NEXT:    stvx 2, 0, 4
173; G5-NEXT:    addi 1, 1, 32
174; G5-NEXT:    blr
175  %tmp.upgrd.1 = insertelement <8 x i16> undef, i16 %tmp, i32 0
176  %tmp72 = insertelement <8 x i16> %tmp.upgrd.1, i16 %tmp, i32 1
177  %tmp73 = insertelement <8 x i16> %tmp72, i16 %tmp, i32 2
178  %tmp74 = insertelement <8 x i16> %tmp73, i16 %tmp, i32 3
179  %tmp75 = insertelement <8 x i16> %tmp74, i16 %tmp, i32 4
180  %tmp76 = insertelement <8 x i16> %tmp75, i16 %tmp, i32 5
181  %tmp77 = insertelement <8 x i16> %tmp76, i16 %tmp, i32 6
182  %tmp78 = insertelement <8 x i16> %tmp77, i16 %tmp, i32 7
183  %tmp78.upgrd.2 = bitcast <8 x i16> %tmp78 to <16 x i8>
184  store <16 x i8> %tmp78.upgrd.2, <16 x i8>* %dst
185  ret void
186}
187
188define void @spltish(<16 x i8>* %A, <16 x i8>* %B) nounwind {
189; G3-LABEL: spltish:
190; G3:       # %bb.0:
191; G3-NEXT:    stwu 1, -48(1)
192; G3-NEXT:    stw 25, 20(1) # 4-byte Folded Spill
193; G3-NEXT:    stw 26, 24(1) # 4-byte Folded Spill
194; G3-NEXT:    stw 27, 28(1) # 4-byte Folded Spill
195; G3-NEXT:    stw 28, 32(1) # 4-byte Folded Spill
196; G3-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
197; G3-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
198; G3-NEXT:    lbz 5, 5(4)
199; G3-NEXT:    lbz 6, 3(4)
200; G3-NEXT:    lbz 7, 1(4)
201; G3-NEXT:    lbz 8, 0(4)
202; G3-NEXT:    lbz 9, 2(4)
203; G3-NEXT:    lbz 10, 4(4)
204; G3-NEXT:    lbz 11, 6(4)
205; G3-NEXT:    lbz 12, 8(4)
206; G3-NEXT:    lbz 0, 10(4)
207; G3-NEXT:    addi 7, 7, -15
208; G3-NEXT:    lbz 30, 12(4)
209; G3-NEXT:    lbz 29, 14(4)
210; G3-NEXT:    lbz 28, 15(4)
211; G3-NEXT:    lbz 27, 13(4)
212; G3-NEXT:    lbz 26, 11(4)
213; G3-NEXT:    lbz 25, 9(4)
214; G3-NEXT:    addi 6, 6, -15
215; G3-NEXT:    lbz 4, 7(4)
216; G3-NEXT:    addi 5, 5, -15
217; G3-NEXT:    addi 25, 25, -15
218; G3-NEXT:    addi 26, 26, -15
219; G3-NEXT:    addi 4, 4, -15
220; G3-NEXT:    addi 27, 27, -15
221; G3-NEXT:    addi 28, 28, -15
222; G3-NEXT:    stb 29, 14(3)
223; G3-NEXT:    stb 30, 12(3)
224; G3-NEXT:    stb 0, 10(3)
225; G3-NEXT:    stb 12, 8(3)
226; G3-NEXT:    stb 11, 6(3)
227; G3-NEXT:    stb 10, 4(3)
228; G3-NEXT:    stb 9, 2(3)
229; G3-NEXT:    stb 8, 0(3)
230; G3-NEXT:    stb 28, 15(3)
231; G3-NEXT:    stb 27, 13(3)
232; G3-NEXT:    stb 26, 11(3)
233; G3-NEXT:    stb 25, 9(3)
234; G3-NEXT:    stb 4, 7(3)
235; G3-NEXT:    stb 5, 5(3)
236; G3-NEXT:    stb 6, 3(3)
237; G3-NEXT:    stb 7, 1(3)
238; G3-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
239; G3-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
240; G3-NEXT:    lwz 28, 32(1) # 4-byte Folded Reload
241; G3-NEXT:    lwz 27, 28(1) # 4-byte Folded Reload
242; G3-NEXT:    lwz 26, 24(1) # 4-byte Folded Reload
243; G3-NEXT:    lwz 25, 20(1) # 4-byte Folded Reload
244; G3-NEXT:    addi 1, 1, 48
245; G3-NEXT:    blr
246;
247; G5-LABEL: spltish:
248; G5:       # %bb.0:
249; G5-NEXT:    lvx 2, 0, 4
250; G5-NEXT:    vspltish 3, 15
251; G5-NEXT:    vsububm 2, 2, 3
252; G5-NEXT:    stvx 2, 0, 3
253; G5-NEXT:    blr
254  %tmp = load <16 x i8>, <16 x i8>* %B         ; <<16 x i8>> [#uses=1]
255  %tmp.s = bitcast <16 x i8> %tmp to <16 x i8>      ; <<16 x i8>> [#uses=1]
256  %tmp4 = sub <16 x i8> %tmp.s, bitcast (<8 x i16> < i16 15, i16 15, i16 15, i16 15, i16 15, i16
257 15, i16 15, i16 15 > to <16 x i8>)       ; <<16 x i8>> [#uses=1]
258  %tmp4.u = bitcast <16 x i8> %tmp4 to <16 x i8>    ; <<16 x i8>> [#uses=1]
259  store <16 x i8> %tmp4.u, <16 x i8>* %A
260  ret void
261}
262