1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
13; CHECK-P8-LABEL: test2elt:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    mtvsrd f0, r3
16; CHECK-P8-NEXT:    xxswapd v2, vs0
17; CHECK-P8-NEXT:    xvcvspuxws vs0, v2
18; CHECK-P8-NEXT:    xxswapd vs0, vs0
19; CHECK-P8-NEXT:    mfvsrd r3, f0
20; CHECK-P8-NEXT:    blr
21;
22; CHECK-P9-LABEL: test2elt:
23; CHECK-P9:       # %bb.0: # %entry
24; CHECK-P9-NEXT:    mtvsrd f0, r3
25; CHECK-P9-NEXT:    xxswapd v2, vs0
26; CHECK-P9-NEXT:    xvcvspuxws vs0, v2
27; CHECK-P9-NEXT:    mfvsrld r3, vs0
28; CHECK-P9-NEXT:    blr
29;
30; CHECK-BE-LABEL: test2elt:
31; CHECK-BE:       # %bb.0: # %entry
32; CHECK-BE-NEXT:    mtvsrd f0, r3
33; CHECK-BE-NEXT:    xvcvspuxws vs0, vs0
34; CHECK-BE-NEXT:    mfvsrd r3, f0
35; CHECK-BE-NEXT:    blr
36entry:
37  %0 = bitcast i64 %a.coerce to <2 x float>
38  %1 = fptoui <2 x float> %0 to <2 x i32>
39  %2 = bitcast <2 x i32> %1 to i64
40  ret i64 %2
41}
42
43define <4 x i32> @test4elt(<4 x float> %a) local_unnamed_addr #1 {
44; CHECK-P8-LABEL: test4elt:
45; CHECK-P8:       # %bb.0: # %entry
46; CHECK-P8-NEXT:    xvcvspuxws v2, v2
47; CHECK-P8-NEXT:    blr
48;
49; CHECK-P9-LABEL: test4elt:
50; CHECK-P9:       # %bb.0: # %entry
51; CHECK-P9-NEXT:    xvcvspuxws v2, v2
52; CHECK-P9-NEXT:    blr
53;
54; CHECK-BE-LABEL: test4elt:
55; CHECK-BE:       # %bb.0: # %entry
56; CHECK-BE-NEXT:    xvcvspuxws v2, v2
57; CHECK-BE-NEXT:    blr
58entry:
59  %0 = fptoui <4 x float> %a to <4 x i32>
60  ret <4 x i32> %0
61}
62
63define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
64; CHECK-P8-LABEL: test8elt:
65; CHECK-P8:       # %bb.0: # %entry
66; CHECK-P8-NEXT:    li r5, 16
67; CHECK-P8-NEXT:    lvx v3, 0, r4
68; CHECK-P8-NEXT:    lvx v2, r4, r5
69; CHECK-P8-NEXT:    xvcvspuxws v3, v3
70; CHECK-P8-NEXT:    xvcvspuxws v2, v2
71; CHECK-P8-NEXT:    stvx v3, 0, r3
72; CHECK-P8-NEXT:    stvx v2, r3, r5
73; CHECK-P8-NEXT:    blr
74;
75; CHECK-P9-LABEL: test8elt:
76; CHECK-P9:       # %bb.0: # %entry
77; CHECK-P9-NEXT:    lxv vs0, 16(r4)
78; CHECK-P9-NEXT:    lxv vs1, 0(r4)
79; CHECK-P9-NEXT:    xvcvspuxws vs1, vs1
80; CHECK-P9-NEXT:    xvcvspuxws vs0, vs0
81; CHECK-P9-NEXT:    stxv vs0, 16(r3)
82; CHECK-P9-NEXT:    stxv vs1, 0(r3)
83; CHECK-P9-NEXT:    blr
84;
85; CHECK-BE-LABEL: test8elt:
86; CHECK-BE:       # %bb.0: # %entry
87; CHECK-BE-NEXT:    lxv vs0, 16(r4)
88; CHECK-BE-NEXT:    lxv vs1, 0(r4)
89; CHECK-BE-NEXT:    xvcvspuxws vs1, vs1
90; CHECK-BE-NEXT:    xvcvspuxws vs0, vs0
91; CHECK-BE-NEXT:    stxv vs0, 16(r3)
92; CHECK-BE-NEXT:    stxv vs1, 0(r3)
93; CHECK-BE-NEXT:    blr
94entry:
95  %a = load <8 x float>, <8 x float>* %0, align 32
96  %1 = fptoui <8 x float> %a to <8 x i32>
97  store <8 x i32> %1, <8 x i32>* %agg.result, align 32
98  ret void
99}
100
101define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
102; CHECK-P8-LABEL: test16elt:
103; CHECK-P8:       # %bb.0: # %entry
104; CHECK-P8-NEXT:    li r5, 16
105; CHECK-P8-NEXT:    li r6, 32
106; CHECK-P8-NEXT:    li r7, 48
107; CHECK-P8-NEXT:    lvx v5, 0, r4
108; CHECK-P8-NEXT:    lvx v2, r4, r5
109; CHECK-P8-NEXT:    lvx v3, r4, r6
110; CHECK-P8-NEXT:    lvx v4, r4, r7
111; CHECK-P8-NEXT:    xvcvspuxws v5, v5
112; CHECK-P8-NEXT:    xvcvspuxws v2, v2
113; CHECK-P8-NEXT:    xvcvspuxws v3, v3
114; CHECK-P8-NEXT:    xvcvspuxws v4, v4
115; CHECK-P8-NEXT:    stvx v5, 0, r3
116; CHECK-P8-NEXT:    stvx v2, r3, r5
117; CHECK-P8-NEXT:    stvx v3, r3, r6
118; CHECK-P8-NEXT:    stvx v4, r3, r7
119; CHECK-P8-NEXT:    blr
120;
121; CHECK-P9-LABEL: test16elt:
122; CHECK-P9:       # %bb.0: # %entry
123; CHECK-P9-NEXT:    lxv vs0, 48(r4)
124; CHECK-P9-NEXT:    lxv vs1, 32(r4)
125; CHECK-P9-NEXT:    lxv vs2, 16(r4)
126; CHECK-P9-NEXT:    lxv vs3, 0(r4)
127; CHECK-P9-NEXT:    xvcvspuxws vs3, vs3
128; CHECK-P9-NEXT:    xvcvspuxws vs2, vs2
129; CHECK-P9-NEXT:    xvcvspuxws vs1, vs1
130; CHECK-P9-NEXT:    xvcvspuxws vs0, vs0
131; CHECK-P9-NEXT:    stxv vs0, 48(r3)
132; CHECK-P9-NEXT:    stxv vs1, 32(r3)
133; CHECK-P9-NEXT:    stxv vs2, 16(r3)
134; CHECK-P9-NEXT:    stxv vs3, 0(r3)
135; CHECK-P9-NEXT:    blr
136;
137; CHECK-BE-LABEL: test16elt:
138; CHECK-BE:       # %bb.0: # %entry
139; CHECK-BE-NEXT:    lxv vs0, 48(r4)
140; CHECK-BE-NEXT:    lxv vs1, 32(r4)
141; CHECK-BE-NEXT:    lxv vs2, 16(r4)
142; CHECK-BE-NEXT:    lxv vs3, 0(r4)
143; CHECK-BE-NEXT:    xvcvspuxws vs3, vs3
144; CHECK-BE-NEXT:    xvcvspuxws vs2, vs2
145; CHECK-BE-NEXT:    xvcvspuxws vs1, vs1
146; CHECK-BE-NEXT:    xvcvspuxws vs0, vs0
147; CHECK-BE-NEXT:    stxv vs0, 48(r3)
148; CHECK-BE-NEXT:    stxv vs1, 32(r3)
149; CHECK-BE-NEXT:    stxv vs2, 16(r3)
150; CHECK-BE-NEXT:    stxv vs3, 0(r3)
151; CHECK-BE-NEXT:    blr
152entry:
153  %a = load <16 x float>, <16 x float>* %0, align 64
154  %1 = fptoui <16 x float> %a to <16 x i32>
155  store <16 x i32> %1, <16 x i32>* %agg.result, align 64
156  ret void
157}
158
159define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
160; CHECK-P8-LABEL: test2elt_signed:
161; CHECK-P8:       # %bb.0: # %entry
162; CHECK-P8-NEXT:    mtvsrd f0, r3
163; CHECK-P8-NEXT:    xxswapd v2, vs0
164; CHECK-P8-NEXT:    xvcvspsxws vs0, v2
165; CHECK-P8-NEXT:    xxswapd vs0, vs0
166; CHECK-P8-NEXT:    mfvsrd r3, f0
167; CHECK-P8-NEXT:    blr
168;
169; CHECK-P9-LABEL: test2elt_signed:
170; CHECK-P9:       # %bb.0: # %entry
171; CHECK-P9-NEXT:    mtvsrd f0, r3
172; CHECK-P9-NEXT:    xxswapd v2, vs0
173; CHECK-P9-NEXT:    xvcvspsxws vs0, v2
174; CHECK-P9-NEXT:    mfvsrld r3, vs0
175; CHECK-P9-NEXT:    blr
176;
177; CHECK-BE-LABEL: test2elt_signed:
178; CHECK-BE:       # %bb.0: # %entry
179; CHECK-BE-NEXT:    mtvsrd f0, r3
180; CHECK-BE-NEXT:    xvcvspsxws vs0, vs0
181; CHECK-BE-NEXT:    mfvsrd r3, f0
182; CHECK-BE-NEXT:    blr
183entry:
184  %0 = bitcast i64 %a.coerce to <2 x float>
185  %1 = fptosi <2 x float> %0 to <2 x i32>
186  %2 = bitcast <2 x i32> %1 to i64
187  ret i64 %2
188}
189
190define <4 x i32> @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
191; CHECK-P8-LABEL: test4elt_signed:
192; CHECK-P8:       # %bb.0: # %entry
193; CHECK-P8-NEXT:    xvcvspsxws v2, v2
194; CHECK-P8-NEXT:    blr
195;
196; CHECK-P9-LABEL: test4elt_signed:
197; CHECK-P9:       # %bb.0: # %entry
198; CHECK-P9-NEXT:    xvcvspsxws v2, v2
199; CHECK-P9-NEXT:    blr
200;
201; CHECK-BE-LABEL: test4elt_signed:
202; CHECK-BE:       # %bb.0: # %entry
203; CHECK-BE-NEXT:    xvcvspsxws v2, v2
204; CHECK-BE-NEXT:    blr
205entry:
206  %0 = fptosi <4 x float> %a to <4 x i32>
207  ret <4 x i32> %0
208}
209
210define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x float>* nocapture readonly) local_unnamed_addr #2 {
211; CHECK-P8-LABEL: test8elt_signed:
212; CHECK-P8:       # %bb.0: # %entry
213; CHECK-P8-NEXT:    li r5, 16
214; CHECK-P8-NEXT:    lvx v3, 0, r4
215; CHECK-P8-NEXT:    lvx v2, r4, r5
216; CHECK-P8-NEXT:    xvcvspsxws v3, v3
217; CHECK-P8-NEXT:    xvcvspsxws v2, v2
218; CHECK-P8-NEXT:    stvx v3, 0, r3
219; CHECK-P8-NEXT:    stvx v2, r3, r5
220; CHECK-P8-NEXT:    blr
221;
222; CHECK-P9-LABEL: test8elt_signed:
223; CHECK-P9:       # %bb.0: # %entry
224; CHECK-P9-NEXT:    lxv vs0, 16(r4)
225; CHECK-P9-NEXT:    lxv vs1, 0(r4)
226; CHECK-P9-NEXT:    xvcvspsxws vs1, vs1
227; CHECK-P9-NEXT:    xvcvspsxws vs0, vs0
228; CHECK-P9-NEXT:    stxv vs0, 16(r3)
229; CHECK-P9-NEXT:    stxv vs1, 0(r3)
230; CHECK-P9-NEXT:    blr
231;
232; CHECK-BE-LABEL: test8elt_signed:
233; CHECK-BE:       # %bb.0: # %entry
234; CHECK-BE-NEXT:    lxv vs0, 16(r4)
235; CHECK-BE-NEXT:    lxv vs1, 0(r4)
236; CHECK-BE-NEXT:    xvcvspsxws vs1, vs1
237; CHECK-BE-NEXT:    xvcvspsxws vs0, vs0
238; CHECK-BE-NEXT:    stxv vs0, 16(r3)
239; CHECK-BE-NEXT:    stxv vs1, 0(r3)
240; CHECK-BE-NEXT:    blr
241entry:
242  %a = load <8 x float>, <8 x float>* %0, align 32
243  %1 = fptosi <8 x float> %a to <8 x i32>
244  store <8 x i32> %1, <8 x i32>* %agg.result, align 32
245  ret void
246}
247
248define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <16 x float>* nocapture readonly) local_unnamed_addr #2 {
249; CHECK-P8-LABEL: test16elt_signed:
250; CHECK-P8:       # %bb.0: # %entry
251; CHECK-P8-NEXT:    li r5, 16
252; CHECK-P8-NEXT:    li r6, 32
253; CHECK-P8-NEXT:    li r7, 48
254; CHECK-P8-NEXT:    lvx v5, 0, r4
255; CHECK-P8-NEXT:    lvx v2, r4, r5
256; CHECK-P8-NEXT:    lvx v3, r4, r6
257; CHECK-P8-NEXT:    lvx v4, r4, r7
258; CHECK-P8-NEXT:    xvcvspsxws v5, v5
259; CHECK-P8-NEXT:    xvcvspsxws v2, v2
260; CHECK-P8-NEXT:    xvcvspsxws v3, v3
261; CHECK-P8-NEXT:    xvcvspsxws v4, v4
262; CHECK-P8-NEXT:    stvx v5, 0, r3
263; CHECK-P8-NEXT:    stvx v2, r3, r5
264; CHECK-P8-NEXT:    stvx v3, r3, r6
265; CHECK-P8-NEXT:    stvx v4, r3, r7
266; CHECK-P8-NEXT:    blr
267;
268; CHECK-P9-LABEL: test16elt_signed:
269; CHECK-P9:       # %bb.0: # %entry
270; CHECK-P9-NEXT:    lxv vs0, 48(r4)
271; CHECK-P9-NEXT:    lxv vs1, 32(r4)
272; CHECK-P9-NEXT:    lxv vs2, 16(r4)
273; CHECK-P9-NEXT:    lxv vs3, 0(r4)
274; CHECK-P9-NEXT:    xvcvspsxws vs3, vs3
275; CHECK-P9-NEXT:    xvcvspsxws vs2, vs2
276; CHECK-P9-NEXT:    xvcvspsxws vs1, vs1
277; CHECK-P9-NEXT:    xvcvspsxws vs0, vs0
278; CHECK-P9-NEXT:    stxv vs0, 48(r3)
279; CHECK-P9-NEXT:    stxv vs1, 32(r3)
280; CHECK-P9-NEXT:    stxv vs2, 16(r3)
281; CHECK-P9-NEXT:    stxv vs3, 0(r3)
282; CHECK-P9-NEXT:    blr
283;
284; CHECK-BE-LABEL: test16elt_signed:
285; CHECK-BE:       # %bb.0: # %entry
286; CHECK-BE-NEXT:    lxv vs0, 48(r4)
287; CHECK-BE-NEXT:    lxv vs1, 32(r4)
288; CHECK-BE-NEXT:    lxv vs2, 16(r4)
289; CHECK-BE-NEXT:    lxv vs3, 0(r4)
290; CHECK-BE-NEXT:    xvcvspsxws vs3, vs3
291; CHECK-BE-NEXT:    xvcvspsxws vs2, vs2
292; CHECK-BE-NEXT:    xvcvspsxws vs1, vs1
293; CHECK-BE-NEXT:    xvcvspsxws vs0, vs0
294; CHECK-BE-NEXT:    stxv vs0, 48(r3)
295; CHECK-BE-NEXT:    stxv vs1, 32(r3)
296; CHECK-BE-NEXT:    stxv vs2, 16(r3)
297; CHECK-BE-NEXT:    stxv vs3, 0(r3)
298; CHECK-BE-NEXT:    blr
299entry:
300  %a = load <16 x float>, <16 x float>* %0, align 64
301  %1 = fptosi <16 x float> %a to <16 x i32>
302  store <16 x i32> %1, <16 x i32>* %agg.result, align 64
303  ret void
304}
305