1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
13; CHECK-P8-LABEL: test2elt:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    mtfprd f0, r3
16; CHECK-P8-NEXT:    mffprd r3, f0
17; CHECK-P8-NEXT:    clrldi r4, r3, 56
18; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
19; CHECK-P8-NEXT:    clrlwi r4, r4, 24
20; CHECK-P8-NEXT:    clrlwi r3, r3, 24
21; CHECK-P8-NEXT:    mtfprwz f0, r4
22; CHECK-P8-NEXT:    mtfprwz f1, r3
23; CHECK-P8-NEXT:    xscvuxdsp f0, f0
24; CHECK-P8-NEXT:    xscvuxdsp f1, f1
25; CHECK-P8-NEXT:    xscvdpspn vs0, f0
26; CHECK-P8-NEXT:    xscvdpspn vs1, f1
27; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
28; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
29; CHECK-P8-NEXT:    vmrghw v2, v3, v2
30; CHECK-P8-NEXT:    xxswapd vs0, v2
31; CHECK-P8-NEXT:    mffprd r3, f0
32; CHECK-P8-NEXT:    blr
33;
34; CHECK-P9-LABEL: test2elt:
35; CHECK-P9:       # %bb.0: # %entry
36; CHECK-P9-NEXT:    mtvsrws v2, r3
37; CHECK-P9-NEXT:    li r3, 0
38; CHECK-P9-NEXT:    vextubrx r3, r3, v2
39; CHECK-P9-NEXT:    clrlwi r3, r3, 24
40; CHECK-P9-NEXT:    mtfprwz f0, r3
41; CHECK-P9-NEXT:    li r3, 1
42; CHECK-P9-NEXT:    xscvuxdsp f0, f0
43; CHECK-P9-NEXT:    vextubrx r3, r3, v2
44; CHECK-P9-NEXT:    clrlwi r3, r3, 24
45; CHECK-P9-NEXT:    xscvdpspn vs0, f0
46; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
47; CHECK-P9-NEXT:    mtfprwz f0, r3
48; CHECK-P9-NEXT:    xscvuxdsp f0, f0
49; CHECK-P9-NEXT:    xscvdpspn vs0, f0
50; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
51; CHECK-P9-NEXT:    vmrghw v2, v2, v3
52; CHECK-P9-NEXT:    mfvsrld r3, v2
53; CHECK-P9-NEXT:    blr
54;
55; CHECK-BE-LABEL: test2elt:
56; CHECK-BE:       # %bb.0: # %entry
57; CHECK-BE-NEXT:    mtvsrws v2, r3
58; CHECK-BE-NEXT:    li r3, 1
59; CHECK-BE-NEXT:    vextublx r3, r3, v2
60; CHECK-BE-NEXT:    clrlwi r3, r3, 24
61; CHECK-BE-NEXT:    mtfprwz f0, r3
62; CHECK-BE-NEXT:    li r3, 0
63; CHECK-BE-NEXT:    xscvuxdsp f0, f0
64; CHECK-BE-NEXT:    vextublx r3, r3, v2
65; CHECK-BE-NEXT:    clrlwi r3, r3, 24
66; CHECK-BE-NEXT:    xscvdpspn v3, f0
67; CHECK-BE-NEXT:    mtfprwz f0, r3
68; CHECK-BE-NEXT:    xscvuxdsp f0, f0
69; CHECK-BE-NEXT:    xscvdpspn v2, f0
70; CHECK-BE-NEXT:    vmrghw v2, v2, v3
71; CHECK-BE-NEXT:    mfvsrd r3, v2
72; CHECK-BE-NEXT:    blr
73entry:
74  %0 = bitcast i16 %a.coerce to <2 x i8>
75  %1 = uitofp <2 x i8> %0 to <2 x float>
76  %2 = bitcast <2 x float> %1 to i64
77  ret i64 %2
78}
79
80define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
81; CHECK-P8-LABEL: test4elt:
82; CHECK-P8:       # %bb.0: # %entry
83; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
84; CHECK-P8-NEXT:    mtvsrwz v2, r3
85; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_0@toc@l
86; CHECK-P8-NEXT:    xxlxor v4, v4, v4
87; CHECK-P8-NEXT:    lvx v3, 0, r4
88; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
89; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
90; CHECK-P8-NEXT:    blr
91;
92; CHECK-P9-LABEL: test4elt:
93; CHECK-P9:       # %bb.0: # %entry
94; CHECK-P9-NEXT:    mtvsrws v2, r3
95; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
96; CHECK-P9-NEXT:    xxlxor v4, v4, v4
97; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0@toc@l
98; CHECK-P9-NEXT:    lxvx v3, 0, r3
99; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
100; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
101; CHECK-P9-NEXT:    blr
102;
103; CHECK-BE-LABEL: test4elt:
104; CHECK-BE:       # %bb.0: # %entry
105; CHECK-BE-NEXT:    mtvsrws v2, r3
106; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
107; CHECK-BE-NEXT:    xxlxor v4, v4, v4
108; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
109; CHECK-BE-NEXT:    lxvx v3, 0, r3
110; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
111; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
112; CHECK-BE-NEXT:    blr
113entry:
114  %0 = bitcast i32 %a.coerce to <4 x i8>
115  %1 = uitofp <4 x i8> %0 to <4 x float>
116  ret <4 x float> %1
117}
118
119define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
120; CHECK-P8-LABEL: test8elt:
121; CHECK-P8:       # %bb.0: # %entry
122; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
123; CHECK-P8-NEXT:    addis r6, r2, .LCPI2_1@toc@ha
124; CHECK-P8-NEXT:    mtvsrd v2, r4
125; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
126; CHECK-P8-NEXT:    addi r4, r6, .LCPI2_1@toc@l
127; CHECK-P8-NEXT:    xxlxor v4, v4, v4
128; CHECK-P8-NEXT:    lvx v3, 0, r5
129; CHECK-P8-NEXT:    lvx v5, 0, r4
130; CHECK-P8-NEXT:    li r4, 16
131; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
132; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
133; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
134; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
135; CHECK-P8-NEXT:    stvx v3, 0, r3
136; CHECK-P8-NEXT:    stvx v2, r3, r4
137; CHECK-P8-NEXT:    blr
138;
139; CHECK-P9-LABEL: test8elt:
140; CHECK-P9:       # %bb.0: # %entry
141; CHECK-P9-NEXT:    mtvsrd v2, r4
142; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
143; CHECK-P9-NEXT:    xxlxor v4, v4, v4
144; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
145; CHECK-P9-NEXT:    lxvx v3, 0, r4
146; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
147; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1@toc@l
148; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
149; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
150; CHECK-P9-NEXT:    lxvx v3, 0, r4
151; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
152; CHECK-P9-NEXT:    stxv vs0, 0(r3)
153; CHECK-P9-NEXT:    xvcvuxwsp vs1, v2
154; CHECK-P9-NEXT:    stxv vs1, 16(r3)
155; CHECK-P9-NEXT:    blr
156;
157; CHECK-BE-LABEL: test8elt:
158; CHECK-BE:       # %bb.0: # %entry
159; CHECK-BE-NEXT:    mtvsrd v2, r4
160; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
161; CHECK-BE-NEXT:    xxlxor v4, v4, v4
162; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
163; CHECK-BE-NEXT:    lxvx v3, 0, r4
164; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
165; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1@toc@l
166; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
167; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
168; CHECK-BE-NEXT:    lxvx v3, 0, r4
169; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
170; CHECK-BE-NEXT:    stxv vs0, 0(r3)
171; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
172; CHECK-BE-NEXT:    stxv vs1, 16(r3)
173; CHECK-BE-NEXT:    blr
174entry:
175  %0 = bitcast i64 %a.coerce to <8 x i8>
176  %1 = uitofp <8 x i8> %0 to <8 x float>
177  store <8 x float> %1, <8 x float>* %agg.result, align 32
178  ret void
179}
180
181define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
182; CHECK-P8-LABEL: test16elt:
183; CHECK-P8:       # %bb.0: # %entry
184; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
185; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_2@toc@ha
186; CHECK-P8-NEXT:    xxlxor v4, v4, v4
187; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_0@toc@l
188; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_2@toc@l
189; CHECK-P8-NEXT:    lvx v3, 0, r4
190; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
191; CHECK-P8-NEXT:    lvx v5, 0, r5
192; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_1@toc@ha
193; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3@toc@l
194; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_1@toc@l
195; CHECK-P8-NEXT:    lvx v0, 0, r4
196; CHECK-P8-NEXT:    lvx v1, 0, r5
197; CHECK-P8-NEXT:    li r4, 48
198; CHECK-P8-NEXT:    li r5, 32
199; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
200; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
201; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
202; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
203; CHECK-P8-NEXT:    xvcvuxwsp v4, v5
204; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
205; CHECK-P8-NEXT:    xvcvuxwsp v5, v0
206; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
207; CHECK-P8-NEXT:    stvx v4, r3, r5
208; CHECK-P8-NEXT:    stvx v3, 0, r3
209; CHECK-P8-NEXT:    stvx v5, r3, r4
210; CHECK-P8-NEXT:    li r4, 16
211; CHECK-P8-NEXT:    stvx v2, r3, r4
212; CHECK-P8-NEXT:    blr
213;
214; CHECK-P9-LABEL: test16elt:
215; CHECK-P9:       # %bb.0: # %entry
216; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
217; CHECK-P9-NEXT:    xxlxor v4, v4, v4
218; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
219; CHECK-P9-NEXT:    lxvx v3, 0, r4
220; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
221; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
222; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
223; CHECK-P9-NEXT:    xvcvuxwsp vs0, v3
224; CHECK-P9-NEXT:    lxvx v3, 0, r4
225; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
226; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_2@toc@l
227; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
228; CHECK-P9-NEXT:    stxv vs0, 0(r3)
229; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
230; CHECK-P9-NEXT:    lxvx v3, 0, r4
231; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
232; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_3@toc@l
233; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
234; CHECK-P9-NEXT:    stxv vs1, 16(r3)
235; CHECK-P9-NEXT:    xvcvuxwsp vs2, v3
236; CHECK-P9-NEXT:    lxvx v3, 0, r4
237; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
238; CHECK-P9-NEXT:    stxv vs2, 32(r3)
239; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
240; CHECK-P9-NEXT:    stxv vs3, 48(r3)
241; CHECK-P9-NEXT:    blr
242;
243; CHECK-BE-LABEL: test16elt:
244; CHECK-BE:       # %bb.0: # %entry
245; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
246; CHECK-BE-NEXT:    xxlxor v4, v4, v4
247; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
248; CHECK-BE-NEXT:    lxvx v3, 0, r4
249; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
250; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
251; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
252; CHECK-BE-NEXT:    xvcvuxwsp vs0, v3
253; CHECK-BE-NEXT:    lxvx v3, 0, r4
254; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
255; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2@toc@l
256; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
257; CHECK-BE-NEXT:    stxv vs0, 0(r3)
258; CHECK-BE-NEXT:    xvcvuxwsp vs1, v3
259; CHECK-BE-NEXT:    lxvx v3, 0, r4
260; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
261; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3@toc@l
262; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
263; CHECK-BE-NEXT:    stxv vs1, 16(r3)
264; CHECK-BE-NEXT:    xvcvuxwsp vs2, v3
265; CHECK-BE-NEXT:    lxvx v3, 0, r4
266; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
267; CHECK-BE-NEXT:    stxv vs2, 32(r3)
268; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
269; CHECK-BE-NEXT:    stxv vs3, 48(r3)
270; CHECK-BE-NEXT:    blr
271entry:
272  %0 = uitofp <16 x i8> %a to <16 x float>
273  store <16 x float> %0, <16 x float>* %agg.result, align 64
274  ret void
275}
276
277define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
278; CHECK-P8-LABEL: test2elt_signed:
279; CHECK-P8:       # %bb.0: # %entry
280; CHECK-P8-NEXT:    mtfprd f0, r3
281; CHECK-P8-NEXT:    mffprd r3, f0
282; CHECK-P8-NEXT:    clrldi r4, r3, 56
283; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
284; CHECK-P8-NEXT:    extsb r4, r4
285; CHECK-P8-NEXT:    extsb r3, r3
286; CHECK-P8-NEXT:    mtfprwa f0, r4
287; CHECK-P8-NEXT:    mtfprwa f1, r3
288; CHECK-P8-NEXT:    xscvsxdsp f0, f0
289; CHECK-P8-NEXT:    xscvsxdsp f1, f1
290; CHECK-P8-NEXT:    xscvdpspn vs0, f0
291; CHECK-P8-NEXT:    xscvdpspn vs1, f1
292; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 3
293; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 3
294; CHECK-P8-NEXT:    vmrghw v2, v3, v2
295; CHECK-P8-NEXT:    xxswapd vs0, v2
296; CHECK-P8-NEXT:    mffprd r3, f0
297; CHECK-P8-NEXT:    blr
298;
299; CHECK-P9-LABEL: test2elt_signed:
300; CHECK-P9:       # %bb.0: # %entry
301; CHECK-P9-NEXT:    mtvsrws v2, r3
302; CHECK-P9-NEXT:    li r3, 0
303; CHECK-P9-NEXT:    vextubrx r3, r3, v2
304; CHECK-P9-NEXT:    extsb r3, r3
305; CHECK-P9-NEXT:    mtfprwa f0, r3
306; CHECK-P9-NEXT:    li r3, 1
307; CHECK-P9-NEXT:    xscvsxdsp f0, f0
308; CHECK-P9-NEXT:    vextubrx r3, r3, v2
309; CHECK-P9-NEXT:    extsb r3, r3
310; CHECK-P9-NEXT:    xscvdpspn vs0, f0
311; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
312; CHECK-P9-NEXT:    mtfprwa f0, r3
313; CHECK-P9-NEXT:    xscvsxdsp f0, f0
314; CHECK-P9-NEXT:    xscvdpspn vs0, f0
315; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
316; CHECK-P9-NEXT:    vmrghw v2, v2, v3
317; CHECK-P9-NEXT:    mfvsrld r3, v2
318; CHECK-P9-NEXT:    blr
319;
320; CHECK-BE-LABEL: test2elt_signed:
321; CHECK-BE:       # %bb.0: # %entry
322; CHECK-BE-NEXT:    mtvsrws v2, r3
323; CHECK-BE-NEXT:    li r3, 1
324; CHECK-BE-NEXT:    vextublx r3, r3, v2
325; CHECK-BE-NEXT:    extsb r3, r3
326; CHECK-BE-NEXT:    mtfprwa f0, r3
327; CHECK-BE-NEXT:    li r3, 0
328; CHECK-BE-NEXT:    xscvsxdsp f0, f0
329; CHECK-BE-NEXT:    vextublx r3, r3, v2
330; CHECK-BE-NEXT:    extsb r3, r3
331; CHECK-BE-NEXT:    xscvdpspn v3, f0
332; CHECK-BE-NEXT:    mtfprwa f0, r3
333; CHECK-BE-NEXT:    xscvsxdsp f0, f0
334; CHECK-BE-NEXT:    xscvdpspn v2, f0
335; CHECK-BE-NEXT:    vmrghw v2, v2, v3
336; CHECK-BE-NEXT:    mfvsrd r3, v2
337; CHECK-BE-NEXT:    blr
338entry:
339  %0 = bitcast i16 %a.coerce to <2 x i8>
340  %1 = sitofp <2 x i8> %0 to <2 x float>
341  %2 = bitcast <2 x float> %1 to i64
342  ret i64 %2
343}
344
345define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
346; CHECK-P8-LABEL: test4elt_signed:
347; CHECK-P8:       # %bb.0: # %entry
348; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
349; CHECK-P8-NEXT:    mtvsrwz v3, r3
350; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_0@toc@l
351; CHECK-P8-NEXT:    lvx v2, 0, r4
352; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
353; CHECK-P8-NEXT:    vspltisw v3, 12
354; CHECK-P8-NEXT:    vadduwm v3, v3, v3
355; CHECK-P8-NEXT:    vslw v2, v2, v3
356; CHECK-P8-NEXT:    vsraw v2, v2, v3
357; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
358; CHECK-P8-NEXT:    blr
359;
360; CHECK-P9-LABEL: test4elt_signed:
361; CHECK-P9:       # %bb.0: # %entry
362; CHECK-P9-NEXT:    mtvsrws v2, r3
363; CHECK-P9-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
364; CHECK-P9-NEXT:    addi r3, r3, .LCPI5_0@toc@l
365; CHECK-P9-NEXT:    lxvx v3, 0, r3
366; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
367; CHECK-P9-NEXT:    vextsb2w v2, v2
368; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
369; CHECK-P9-NEXT:    blr
370;
371; CHECK-BE-LABEL: test4elt_signed:
372; CHECK-BE:       # %bb.0: # %entry
373; CHECK-BE-NEXT:    mtvsrws v2, r3
374; CHECK-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
375; CHECK-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
376; CHECK-BE-NEXT:    lxvx v3, 0, r3
377; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
378; CHECK-BE-NEXT:    vextsb2w v2, v2
379; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
380; CHECK-BE-NEXT:    blr
381entry:
382  %0 = bitcast i32 %a.coerce to <4 x i8>
383  %1 = sitofp <4 x i8> %0 to <4 x float>
384  ret <4 x float> %1
385}
386
387define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %agg.result, i64 %a.coerce) local_unnamed_addr #2 {
388; CHECK-P8-LABEL: test8elt_signed:
389; CHECK-P8:       # %bb.0: # %entry
390; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0@toc@ha
391; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_1@toc@ha
392; CHECK-P8-NEXT:    mtvsrd v3, r4
393; CHECK-P8-NEXT:    vspltisw v5, 12
394; CHECK-P8-NEXT:    li r4, 16
395; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0@toc@l
396; CHECK-P8-NEXT:    lvx v2, 0, r5
397; CHECK-P8-NEXT:    addi r5, r6, .LCPI6_1@toc@l
398; CHECK-P8-NEXT:    lvx v4, 0, r5
399; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
400; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
401; CHECK-P8-NEXT:    vadduwm v4, v5, v5
402; CHECK-P8-NEXT:    vslw v2, v2, v4
403; CHECK-P8-NEXT:    vslw v3, v3, v4
404; CHECK-P8-NEXT:    vsraw v2, v2, v4
405; CHECK-P8-NEXT:    vsraw v3, v3, v4
406; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
407; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
408; CHECK-P8-NEXT:    stvx v2, 0, r3
409; CHECK-P8-NEXT:    stvx v3, r3, r4
410; CHECK-P8-NEXT:    blr
411;
412; CHECK-P9-LABEL: test8elt_signed:
413; CHECK-P9:       # %bb.0: # %entry
414; CHECK-P9-NEXT:    mtvsrd v2, r4
415; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
416; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0@toc@l
417; CHECK-P9-NEXT:    lxvx v3, 0, r4
418; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
419; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_1@toc@l
420; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
421; CHECK-P9-NEXT:    vextsb2w v3, v3
422; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
423; CHECK-P9-NEXT:    lxvx v3, 0, r4
424; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
425; CHECK-P9-NEXT:    stxv vs0, 0(r3)
426; CHECK-P9-NEXT:    vextsb2w v2, v2
427; CHECK-P9-NEXT:    xvcvsxwsp vs1, v2
428; CHECK-P9-NEXT:    stxv vs1, 16(r3)
429; CHECK-P9-NEXT:    blr
430;
431; CHECK-BE-LABEL: test8elt_signed:
432; CHECK-BE:       # %bb.0: # %entry
433; CHECK-BE-NEXT:    mtvsrd v2, r4
434; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
435; CHECK-BE-NEXT:    xxlxor v3, v3, v3
436; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0@toc@l
437; CHECK-BE-NEXT:    lxvx v4, 0, r4
438; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
439; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1@toc@l
440; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
441; CHECK-BE-NEXT:    vextsb2w v3, v3
442; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
443; CHECK-BE-NEXT:    lxvx v3, 0, r4
444; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
445; CHECK-BE-NEXT:    stxv vs0, 16(r3)
446; CHECK-BE-NEXT:    vextsb2w v2, v2
447; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
448; CHECK-BE-NEXT:    stxv vs1, 0(r3)
449; CHECK-BE-NEXT:    blr
450entry:
451  %0 = bitcast i64 %a.coerce to <8 x i8>
452  %1 = sitofp <8 x i8> %0 to <8 x float>
453  store <8 x float> %1, <8 x float>* %agg.result, align 32
454  ret void
455}
456
457define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>) %agg.result, <16 x i8> %a) local_unnamed_addr #3 {
458; CHECK-P8-LABEL: test16elt_signed:
459; CHECK-P8:       # %bb.0: # %entry
460; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
461; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_2@toc@ha
462; CHECK-P8-NEXT:    vspltisw v1, 12
463; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_0@toc@l
464; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_2@toc@l
465; CHECK-P8-NEXT:    lvx v3, 0, r4
466; CHECK-P8-NEXT:    addis r4, r2, .LCPI7_3@toc@ha
467; CHECK-P8-NEXT:    lvx v4, 0, r5
468; CHECK-P8-NEXT:    addis r5, r2, .LCPI7_1@toc@ha
469; CHECK-P8-NEXT:    addi r4, r4, .LCPI7_3@toc@l
470; CHECK-P8-NEXT:    addi r5, r5, .LCPI7_1@toc@l
471; CHECK-P8-NEXT:    lvx v5, 0, r4
472; CHECK-P8-NEXT:    lvx v0, 0, r5
473; CHECK-P8-NEXT:    li r4, 48
474; CHECK-P8-NEXT:    li r5, 32
475; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
476; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
477; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
478; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
479; CHECK-P8-NEXT:    vadduwm v0, v1, v1
480; CHECK-P8-NEXT:    vslw v3, v3, v0
481; CHECK-P8-NEXT:    vslw v4, v4, v0
482; CHECK-P8-NEXT:    vslw v5, v5, v0
483; CHECK-P8-NEXT:    vslw v2, v2, v0
484; CHECK-P8-NEXT:    vsraw v3, v3, v0
485; CHECK-P8-NEXT:    vsraw v4, v4, v0
486; CHECK-P8-NEXT:    vsraw v5, v5, v0
487; CHECK-P8-NEXT:    vsraw v2, v2, v0
488; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
489; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
490; CHECK-P8-NEXT:    xvcvsxwsp v5, v5
491; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
492; CHECK-P8-NEXT:    stvx v3, 0, r3
493; CHECK-P8-NEXT:    stvx v4, r3, r5
494; CHECK-P8-NEXT:    stvx v5, r3, r4
495; CHECK-P8-NEXT:    li r4, 16
496; CHECK-P8-NEXT:    stvx v2, r3, r4
497; CHECK-P8-NEXT:    blr
498;
499; CHECK-P9-LABEL: test16elt_signed:
500; CHECK-P9:       # %bb.0: # %entry
501; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
502; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_0@toc@l
503; CHECK-P9-NEXT:    lxvx v3, 0, r4
504; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_1@toc@ha
505; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_1@toc@l
506; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
507; CHECK-P9-NEXT:    vextsb2w v3, v3
508; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
509; CHECK-P9-NEXT:    lxvx v3, 0, r4
510; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_2@toc@ha
511; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_2@toc@l
512; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
513; CHECK-P9-NEXT:    stxv vs0, 0(r3)
514; CHECK-P9-NEXT:    vextsb2w v3, v3
515; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
516; CHECK-P9-NEXT:    lxvx v3, 0, r4
517; CHECK-P9-NEXT:    addis r4, r2, .LCPI7_3@toc@ha
518; CHECK-P9-NEXT:    addi r4, r4, .LCPI7_3@toc@l
519; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
520; CHECK-P9-NEXT:    stxv vs1, 16(r3)
521; CHECK-P9-NEXT:    vextsb2w v3, v3
522; CHECK-P9-NEXT:    xvcvsxwsp vs2, v3
523; CHECK-P9-NEXT:    lxvx v3, 0, r4
524; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
525; CHECK-P9-NEXT:    stxv vs2, 32(r3)
526; CHECK-P9-NEXT:    vextsb2w v2, v2
527; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
528; CHECK-P9-NEXT:    stxv vs3, 48(r3)
529; CHECK-P9-NEXT:    blr
530;
531; CHECK-BE-LABEL: test16elt_signed:
532; CHECK-BE:       # %bb.0: # %entry
533; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
534; CHECK-BE-NEXT:    xxlxor v4, v4, v4
535; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0@toc@l
536; CHECK-BE-NEXT:    lxvx v3, 0, r4
537; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1@toc@ha
538; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1@toc@l
539; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
540; CHECK-BE-NEXT:    vextsb2w v3, v3
541; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
542; CHECK-BE-NEXT:    lxvx v3, 0, r4
543; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2@toc@ha
544; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2@toc@l
545; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
546; CHECK-BE-NEXT:    stxv vs0, 16(r3)
547; CHECK-BE-NEXT:    vextsb2w v3, v3
548; CHECK-BE-NEXT:    xvcvsxwsp vs1, v3
549; CHECK-BE-NEXT:    lxvx v3, 0, r4
550; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3@toc@ha
551; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3@toc@l
552; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
553; CHECK-BE-NEXT:    stxv vs1, 48(r3)
554; CHECK-BE-NEXT:    vextsb2w v3, v3
555; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
556; CHECK-BE-NEXT:    lxvx v3, 0, r4
557; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
558; CHECK-BE-NEXT:    stxv vs2, 0(r3)
559; CHECK-BE-NEXT:    vextsb2w v2, v2
560; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
561; CHECK-BE-NEXT:    stxv vs3, 32(r3)
562; CHECK-BE-NEXT:    blr
563entry:
564  %0 = sitofp <16 x i8> %a to <16 x float>
565  store <16 x float> %0, <16 x float>* %agg.result, align 64
566  ret void
567}
568