1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define void @test8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
13; CHECK-P8-LABEL: test8:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_0@toc@ha
16; CHECK-P8-NEXT:    addis r6, r2, .LCPI0_2@toc@ha
17; CHECK-P8-NEXT:    lvx v3, 0, r4
18; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_1@toc@ha
19; CHECK-P8-NEXT:    xxlxor v4, v4, v4
20; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_0@toc@l
21; CHECK-P8-NEXT:    addi r6, r6, .LCPI0_2@toc@l
22; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_1@toc@l
23; CHECK-P8-NEXT:    lvx v2, 0, r5
24; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_3@toc@ha
25; CHECK-P8-NEXT:    lvx v5, 0, r6
26; CHECK-P8-NEXT:    lvx v1, 0, r4
27; CHECK-P8-NEXT:    li r4, 48
28; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_3@toc@l
29; CHECK-P8-NEXT:    lvx v0, 0, r5
30; CHECK-P8-NEXT:    li r5, 32
31; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
32; CHECK-P8-NEXT:    vperm v5, v4, v3, v5
33; CHECK-P8-NEXT:    vperm v0, v4, v3, v0
34; CHECK-P8-NEXT:    vperm v3, v4, v3, v1
35; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
36; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
37; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
38; CHECK-P8-NEXT:    xvcvuxddp vs3, v3
39; CHECK-P8-NEXT:    xxswapd vs0, vs0
40; CHECK-P8-NEXT:    xxswapd vs1, vs1
41; CHECK-P8-NEXT:    xxswapd vs2, vs2
42; CHECK-P8-NEXT:    xxswapd vs3, vs3
43; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
44; CHECK-P8-NEXT:    li r4, 16
45; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
46; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
47; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
48; CHECK-P8-NEXT:    blr
49;
50; CHECK-P9-LABEL: test8:
51; CHECK-P9:       # %bb.0: # %entry
52; CHECK-P9-NEXT:    lxv v2, 0(r4)
53; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
54; CHECK-P9-NEXT:    xxlxor v4, v4, v4
55; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_0@toc@l
56; CHECK-P9-NEXT:    lxvx v3, 0, r4
57; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_1@toc@ha
58; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_1@toc@l
59; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
60; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
61; CHECK-P9-NEXT:    lxvx v3, 0, r4
62; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_2@toc@ha
63; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_2@toc@l
64; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
65; CHECK-P9-NEXT:    stxv vs0, 0(r3)
66; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
67; CHECK-P9-NEXT:    lxvx v3, 0, r4
68; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_3@toc@ha
69; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_3@toc@l
70; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
71; CHECK-P9-NEXT:    stxv vs1, 16(r3)
72; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
73; CHECK-P9-NEXT:    lxvx v3, 0, r4
74; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
75; CHECK-P9-NEXT:    stxv vs2, 32(r3)
76; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
77; CHECK-P9-NEXT:    stxv vs3, 48(r3)
78; CHECK-P9-NEXT:    blr
79;
80; CHECK-BE-LABEL: test8:
81; CHECK-BE:       # %bb.0: # %entry
82; CHECK-BE-NEXT:    lxv v2, 0(r4)
83; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
84; CHECK-BE-NEXT:    xxlxor v4, v4, v4
85; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_0@toc@l
86; CHECK-BE-NEXT:    lxvx v3, 0, r4
87; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_1@toc@ha
88; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_1@toc@l
89; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
90; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
91; CHECK-BE-NEXT:    lxvx v3, 0, r4
92; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_2@toc@ha
93; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_2@toc@l
94; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
95; CHECK-BE-NEXT:    stxv vs0, 0(r3)
96; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
97; CHECK-BE-NEXT:    lxvx v3, 0, r4
98; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_3@toc@ha
99; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_3@toc@l
100; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
101; CHECK-BE-NEXT:    stxv vs1, 16(r3)
102; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
103; CHECK-BE-NEXT:    lxvx v3, 0, r4
104; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
105; CHECK-BE-NEXT:    stxv vs2, 32(r3)
106; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
107; CHECK-BE-NEXT:    stxv vs3, 48(r3)
108; CHECK-BE-NEXT:    blr
109entry:
110  %0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
111  %1 = uitofp <8 x i16> %0 to <8 x double>
112  store <8 x double> %1, <8 x double>* %Sink, align 16
113  ret void
114}
115
116define void @test4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
117; CHECK-P8-LABEL: test4:
118; CHECK-P8:       # %bb.0: # %entry
119; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0@toc@ha
120; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1@toc@ha
121; CHECK-P8-NEXT:    lvx v3, 0, r4
122; CHECK-P8-NEXT:    xxlxor v4, v4, v4
123; CHECK-P8-NEXT:    li r4, 16
124; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0@toc@l
125; CHECK-P8-NEXT:    lvx v2, 0, r5
126; CHECK-P8-NEXT:    addi r5, r6, .LCPI1_1@toc@l
127; CHECK-P8-NEXT:    lvx v5, 0, r5
128; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
129; CHECK-P8-NEXT:    vperm v3, v4, v3, v5
130; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
131; CHECK-P8-NEXT:    xvcvuxddp vs1, v3
132; CHECK-P8-NEXT:    xxswapd vs0, vs0
133; CHECK-P8-NEXT:    xxswapd vs1, vs1
134; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
135; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
136; CHECK-P8-NEXT:    blr
137;
138; CHECK-P9-LABEL: test4:
139; CHECK-P9:       # %bb.0: # %entry
140; CHECK-P9-NEXT:    lxv v2, 0(r4)
141; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
142; CHECK-P9-NEXT:    xxlxor v4, v4, v4
143; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0@toc@l
144; CHECK-P9-NEXT:    lxvx v3, 0, r4
145; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
146; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_1@toc@l
147; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
148; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
149; CHECK-P9-NEXT:    lxvx v3, 0, r4
150; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
151; CHECK-P9-NEXT:    stxv vs0, 0(r3)
152; CHECK-P9-NEXT:    xvcvuxddp vs1, v2
153; CHECK-P9-NEXT:    stxv vs1, 16(r3)
154; CHECK-P9-NEXT:    blr
155;
156; CHECK-BE-LABEL: test4:
157; CHECK-BE:       # %bb.0: # %entry
158; CHECK-BE-NEXT:    lxv v2, 0(r4)
159; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
160; CHECK-BE-NEXT:    xxlxor v4, v4, v4
161; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0@toc@l
162; CHECK-BE-NEXT:    lxvx v3, 0, r4
163; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
164; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1@toc@l
165; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
166; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
167; CHECK-BE-NEXT:    lxvx v3, 0, r4
168; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
169; CHECK-BE-NEXT:    stxv vs0, 0(r3)
170; CHECK-BE-NEXT:    xvcvuxddp vs1, v2
171; CHECK-BE-NEXT:    stxv vs1, 16(r3)
172; CHECK-BE-NEXT:    blr
173entry:
174  %0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
175  %1 = uitofp <4 x i16> %0 to <4 x double>
176  store <4 x double> %1, <4 x double>* %Sink, align 16
177  ret void
178}
179
180define void @test2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
181; CHECK-P8-LABEL: test2:
182; CHECK-P8:       # %bb.0: # %entry
183; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
184; CHECK-P8-NEXT:    lvx v3, 0, r4
185; CHECK-P8-NEXT:    xxlxor v4, v4, v4
186; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
187; CHECK-P8-NEXT:    lvx v2, 0, r5
188; CHECK-P8-NEXT:    vperm v2, v4, v3, v2
189; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
190; CHECK-P8-NEXT:    xxswapd vs0, vs0
191; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
192; CHECK-P8-NEXT:    blr
193;
194; CHECK-P9-LABEL: test2:
195; CHECK-P9:       # %bb.0: # %entry
196; CHECK-P9-NEXT:    lxv v2, 0(r4)
197; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
198; CHECK-P9-NEXT:    xxlxor v4, v4, v4
199; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
200; CHECK-P9-NEXT:    lxvx v3, 0, r4
201; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
202; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
203; CHECK-P9-NEXT:    stxv vs0, 0(r3)
204; CHECK-P9-NEXT:    blr
205;
206; CHECK-BE-LABEL: test2:
207; CHECK-BE:       # %bb.0: # %entry
208; CHECK-BE-NEXT:    lxv v2, 0(r4)
209; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
210; CHECK-BE-NEXT:    xxlxor v4, v4, v4
211; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
212; CHECK-BE-NEXT:    lxvx v3, 0, r4
213; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
214; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
215; CHECK-BE-NEXT:    stxv vs0, 0(r3)
216; CHECK-BE-NEXT:    blr
217entry:
218  %0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
219  %1 = uitofp <2 x i16> %0 to <2 x double>
220  store <2 x double> %1, <2 x double>* %Sink, align 16
221  ret void
222}
223
224define void @stest8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
225; CHECK-P8-LABEL: stest8:
226; CHECK-P8:       # %bb.0: # %entry
227; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0@toc@ha
228; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2@toc@ha
229; CHECK-P8-NEXT:    lvx v3, 0, r4
230; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
231; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0@toc@l
232; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_2@toc@l
233; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1@toc@l
234; CHECK-P8-NEXT:    lvx v2, 0, r5
235; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_3@toc@ha
236; CHECK-P8-NEXT:    lvx v4, 0, r6
237; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_4@toc@ha
238; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
239; CHECK-P8-NEXT:    li r4, 48
240; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_3@toc@l
241; CHECK-P8-NEXT:    lvx v5, 0, r5
242; CHECK-P8-NEXT:    addi r5, r6, .LCPI3_4@toc@l
243; CHECK-P8-NEXT:    lvx v0, 0, r5
244; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
245; CHECK-P8-NEXT:    li r5, 32
246; CHECK-P8-NEXT:    vperm v4, v3, v3, v4
247; CHECK-P8-NEXT:    vperm v5, v3, v3, v5
248; CHECK-P8-NEXT:    vperm v3, v3, v3, v0
249; CHECK-P8-NEXT:    xxswapd v0, vs0
250; CHECK-P8-NEXT:    vsld v2, v2, v0
251; CHECK-P8-NEXT:    vsld v4, v4, v0
252; CHECK-P8-NEXT:    vsld v5, v5, v0
253; CHECK-P8-NEXT:    vsld v3, v3, v0
254; CHECK-P8-NEXT:    vsrad v2, v2, v0
255; CHECK-P8-NEXT:    vsrad v3, v3, v0
256; CHECK-P8-NEXT:    vsrad v4, v4, v0
257; CHECK-P8-NEXT:    vsrad v5, v5, v0
258; CHECK-P8-NEXT:    xvcvsxddp vs2, v3
259; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
260; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
261; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
262; CHECK-P8-NEXT:    xxswapd vs2, vs2
263; CHECK-P8-NEXT:    xxswapd vs0, vs0
264; CHECK-P8-NEXT:    xxswapd vs1, vs1
265; CHECK-P8-NEXT:    xxswapd vs3, vs3
266; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
267; CHECK-P8-NEXT:    li r4, 16
268; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
269; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
270; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
271; CHECK-P8-NEXT:    blr
272;
273; CHECK-P9-LABEL: stest8:
274; CHECK-P9:       # %bb.0: # %entry
275; CHECK-P9-NEXT:    lxv v2, 0(r4)
276; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
277; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
278; CHECK-P9-NEXT:    lxvx v3, 0, r4
279; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
280; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
281; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
282; CHECK-P9-NEXT:    vextsh2d v3, v3
283; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
284; CHECK-P9-NEXT:    lxvx v3, 0, r4
285; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
286; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_2@toc@l
287; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
288; CHECK-P9-NEXT:    stxv vs0, 0(r3)
289; CHECK-P9-NEXT:    vextsh2d v3, v3
290; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
291; CHECK-P9-NEXT:    lxvx v3, 0, r4
292; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
293; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_3@toc@l
294; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
295; CHECK-P9-NEXT:    stxv vs1, 16(r3)
296; CHECK-P9-NEXT:    vextsh2d v3, v3
297; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
298; CHECK-P9-NEXT:    lxvx v3, 0, r4
299; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
300; CHECK-P9-NEXT:    stxv vs2, 32(r3)
301; CHECK-P9-NEXT:    vextsh2d v2, v2
302; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
303; CHECK-P9-NEXT:    stxv vs3, 48(r3)
304; CHECK-P9-NEXT:    blr
305;
306; CHECK-BE-LABEL: stest8:
307; CHECK-BE:       # %bb.0: # %entry
308; CHECK-BE-NEXT:    lxv v2, 0(r4)
309; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
310; CHECK-BE-NEXT:    xxlxor v4, v4, v4
311; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
312; CHECK-BE-NEXT:    lxvx v3, 0, r4
313; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
314; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
315; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
316; CHECK-BE-NEXT:    vextsh2d v3, v3
317; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
318; CHECK-BE-NEXT:    lxvx v3, 0, r4
319; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
320; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2@toc@l
321; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
322; CHECK-BE-NEXT:    stxv vs0, 16(r3)
323; CHECK-BE-NEXT:    vextsh2d v3, v3
324; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
325; CHECK-BE-NEXT:    lxvx v3, 0, r4
326; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
327; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3@toc@l
328; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
329; CHECK-BE-NEXT:    stxv vs1, 48(r3)
330; CHECK-BE-NEXT:    vextsh2d v3, v3
331; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
332; CHECK-BE-NEXT:    lxvx v3, 0, r4
333; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
334; CHECK-BE-NEXT:    stxv vs2, 0(r3)
335; CHECK-BE-NEXT:    vextsh2d v2, v2
336; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
337; CHECK-BE-NEXT:    stxv vs3, 32(r3)
338; CHECK-BE-NEXT:    blr
339entry:
340  %0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
341  %1 = sitofp <8 x i16> %0 to <8 x double>
342  store <8 x double> %1, <8 x double>* %Sink, align 16
343  ret void
344}
345
346define void @stest4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
347; CHECK-P8-LABEL: stest4:
348; CHECK-P8:       # %bb.0: # %entry
349; CHECK-P8-NEXT:    addis r5, r2, .LCPI4_0@toc@ha
350; CHECK-P8-NEXT:    addis r6, r2, .LCPI4_2@toc@ha
351; CHECK-P8-NEXT:    lvx v3, 0, r4
352; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
353; CHECK-P8-NEXT:    addi r5, r5, .LCPI4_0@toc@l
354; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_1@toc@l
355; CHECK-P8-NEXT:    lvx v2, 0, r5
356; CHECK-P8-NEXT:    addi r5, r6, .LCPI4_2@toc@l
357; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
358; CHECK-P8-NEXT:    li r4, 16
359; CHECK-P8-NEXT:    lvx v4, 0, r5
360; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
361; CHECK-P8-NEXT:    vperm v3, v3, v3, v4
362; CHECK-P8-NEXT:    xxswapd v4, vs0
363; CHECK-P8-NEXT:    vsld v2, v2, v4
364; CHECK-P8-NEXT:    vsld v3, v3, v4
365; CHECK-P8-NEXT:    vsrad v2, v2, v4
366; CHECK-P8-NEXT:    vsrad v3, v3, v4
367; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
368; CHECK-P8-NEXT:    xvcvsxddp vs1, v3
369; CHECK-P8-NEXT:    xxswapd vs0, vs0
370; CHECK-P8-NEXT:    xxswapd vs1, vs1
371; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
372; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
373; CHECK-P8-NEXT:    blr
374;
375; CHECK-P9-LABEL: stest4:
376; CHECK-P9:       # %bb.0: # %entry
377; CHECK-P9-NEXT:    lxv v2, 0(r4)
378; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
379; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_0@toc@l
380; CHECK-P9-NEXT:    lxvx v3, 0, r4
381; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
382; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_1@toc@l
383; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
384; CHECK-P9-NEXT:    vextsh2d v3, v3
385; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
386; CHECK-P9-NEXT:    lxvx v3, 0, r4
387; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
388; CHECK-P9-NEXT:    stxv vs0, 0(r3)
389; CHECK-P9-NEXT:    vextsh2d v2, v2
390; CHECK-P9-NEXT:    xvcvsxddp vs1, v2
391; CHECK-P9-NEXT:    stxv vs1, 16(r3)
392; CHECK-P9-NEXT:    blr
393;
394; CHECK-BE-LABEL: stest4:
395; CHECK-BE:       # %bb.0: # %entry
396; CHECK-BE-NEXT:    lxv v2, 0(r4)
397; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
398; CHECK-BE-NEXT:    xxlxor v3, v3, v3
399; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_0@toc@l
400; CHECK-BE-NEXT:    lxvx v4, 0, r4
401; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
402; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_1@toc@l
403; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
404; CHECK-BE-NEXT:    vextsh2d v3, v3
405; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
406; CHECK-BE-NEXT:    lxvx v3, 0, r4
407; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
408; CHECK-BE-NEXT:    stxv vs0, 16(r3)
409; CHECK-BE-NEXT:    vextsh2d v2, v2
410; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
411; CHECK-BE-NEXT:    stxv vs1, 0(r3)
412; CHECK-BE-NEXT:    blr
413entry:
414  %0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
415  %1 = sitofp <4 x i16> %0 to <4 x double>
416  store <4 x double> %1, <4 x double>* %Sink, align 16
417  ret void
418}
419
420define void @stest2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
421; CHECK-P8-LABEL: stest2:
422; CHECK-P8:       # %bb.0: # %entry
423; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0@toc@ha
424; CHECK-P8-NEXT:    lvx v3, 0, r4
425; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1@toc@ha
426; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0@toc@l
427; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1@toc@l
428; CHECK-P8-NEXT:    lvx v2, 0, r5
429; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
430; CHECK-P8-NEXT:    vperm v2, v3, v3, v2
431; CHECK-P8-NEXT:    xxswapd v3, vs0
432; CHECK-P8-NEXT:    vsld v2, v2, v3
433; CHECK-P8-NEXT:    vsrad v2, v2, v3
434; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
435; CHECK-P8-NEXT:    xxswapd vs0, vs0
436; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
437; CHECK-P8-NEXT:    blr
438;
439; CHECK-P9-LABEL: stest2:
440; CHECK-P9:       # %bb.0: # %entry
441; CHECK-P9-NEXT:    lxv v2, 0(r4)
442; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
443; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0@toc@l
444; CHECK-P9-NEXT:    lxvx v3, 0, r4
445; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
446; CHECK-P9-NEXT:    vextsh2d v2, v2
447; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
448; CHECK-P9-NEXT:    stxv vs0, 0(r3)
449; CHECK-P9-NEXT:    blr
450;
451; CHECK-BE-LABEL: stest2:
452; CHECK-BE:       # %bb.0: # %entry
453; CHECK-BE-NEXT:    lxv v2, 0(r4)
454; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
455; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0@toc@l
456; CHECK-BE-NEXT:    lxvx v3, 0, r4
457; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
458; CHECK-BE-NEXT:    vextsh2d v2, v2
459; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
460; CHECK-BE-NEXT:    stxv vs0, 0(r3)
461; CHECK-BE-NEXT:    blr
462entry:
463  %0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
464  %1 = sitofp <2 x i16> %0 to <2 x double>
465  store <2 x double> %1, <2 x double>* %Sink, align 16
466  ret void
467}
468