1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define <2 x double> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
13; CHECK-P8-LABEL: test2elt:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    mtfprd f0, r3
16; CHECK-P8-NEXT:    xxswapd v2, vs0
17; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
18; CHECK-P8-NEXT:    xvcvuxwdp v2, v2
19; CHECK-P8-NEXT:    blr
20;
21; CHECK-P9-LABEL: test2elt:
22; CHECK-P9:       # %bb.0: # %entry
23; CHECK-P9-NEXT:    mtfprd f0, r3
24; CHECK-P9-NEXT:    xxswapd v2, vs0
25; CHECK-P9-NEXT:    xxmrglw v2, v2, v2
26; CHECK-P9-NEXT:    xvcvuxwdp v2, v2
27; CHECK-P9-NEXT:    blr
28;
29; CHECK-BE-LABEL: test2elt:
30; CHECK-BE:       # %bb.0: # %entry
31; CHECK-BE-NEXT:    mtfprd f0, r3
32; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
33; CHECK-BE-NEXT:    xvcvuxwdp v2, v2
34; CHECK-BE-NEXT:    blr
35entry:
36  %0 = bitcast i64 %a.coerce to <2 x i32>
37  %1 = uitofp <2 x i32> %0 to <2 x double>
38  ret <2 x double> %1
39}
40
41define void @test4elt(<4 x double>* noalias nocapture sret(<4 x double>) %agg.result, <4 x i32> %a) local_unnamed_addr #1 {
42; CHECK-P8-LABEL: test4elt:
43; CHECK-P8:       # %bb.0: # %entry
44; CHECK-P8-NEXT:    xxmrglw v3, v2, v2
45; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
46; CHECK-P8-NEXT:    li r4, 16
47; CHECK-P8-NEXT:    xvcvuxwdp vs0, v3
48; CHECK-P8-NEXT:    xvcvuxwdp vs1, v2
49; CHECK-P8-NEXT:    xxswapd vs0, vs0
50; CHECK-P8-NEXT:    xxswapd vs1, vs1
51; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
52; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
53; CHECK-P8-NEXT:    blr
54;
55; CHECK-P9-LABEL: test4elt:
56; CHECK-P9:       # %bb.0: # %entry
57; CHECK-P9-NEXT:    xxmrglw v3, v2, v2
58; CHECK-P9-NEXT:    xxmrghw v2, v2, v2
59; CHECK-P9-NEXT:    xvcvuxwdp vs0, v3
60; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
61; CHECK-P9-NEXT:    stxv vs1, 16(r3)
62; CHECK-P9-NEXT:    stxv vs0, 0(r3)
63; CHECK-P9-NEXT:    blr
64;
65; CHECK-BE-LABEL: test4elt:
66; CHECK-BE:       # %bb.0: # %entry
67; CHECK-BE-NEXT:    xxmrghw v3, v2, v2
68; CHECK-BE-NEXT:    xxmrglw v2, v2, v2
69; CHECK-BE-NEXT:    xvcvuxwdp vs0, v3
70; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
71; CHECK-BE-NEXT:    stxv vs1, 16(r3)
72; CHECK-BE-NEXT:    stxv vs0, 0(r3)
73; CHECK-BE-NEXT:    blr
74entry:
75  %0 = uitofp <4 x i32> %a to <4 x double>
76  store <4 x double> %0, <4 x double>* %agg.result, align 32
77  ret void
78}
79
80define void @test8elt(<8 x double>* noalias nocapture sret(<8 x double>) %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
81; CHECK-P8-LABEL: test8elt:
82; CHECK-P8:       # %bb.0: # %entry
83; CHECK-P8-NEXT:    li r5, 16
84; CHECK-P8-NEXT:    lvx v3, 0, r4
85; CHECK-P8-NEXT:    li r6, 32
86; CHECK-P8-NEXT:    lvx v2, r4, r5
87; CHECK-P8-NEXT:    li r4, 48
88; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
89; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
90; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
91; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
92; CHECK-P8-NEXT:    xvcvuxwdp vs2, v5
93; CHECK-P8-NEXT:    xvcvuxwdp vs0, v4
94; CHECK-P8-NEXT:    xvcvuxwdp vs1, v2
95; CHECK-P8-NEXT:    xvcvuxwdp vs3, v3
96; CHECK-P8-NEXT:    xxswapd vs2, vs2
97; CHECK-P8-NEXT:    xxswapd vs0, vs0
98; CHECK-P8-NEXT:    xxswapd vs1, vs1
99; CHECK-P8-NEXT:    xxswapd vs3, vs3
100; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
101; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
102; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
103; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
104; CHECK-P8-NEXT:    blr
105;
106; CHECK-P9-LABEL: test8elt:
107; CHECK-P9:       # %bb.0: # %entry
108; CHECK-P9-NEXT:    lxv vs1, 0(r4)
109; CHECK-P9-NEXT:    lxv vs0, 16(r4)
110; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
111; CHECK-P9-NEXT:    xvcvuxwdp vs2, v2
112; CHECK-P9-NEXT:    xxmrghw v2, vs1, vs1
113; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
114; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
115; CHECK-P9-NEXT:    xvcvuxwdp vs3, v2
116; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
117; CHECK-P9-NEXT:    stxv vs2, 0(r3)
118; CHECK-P9-NEXT:    xvcvuxwdp vs0, v2
119; CHECK-P9-NEXT:    stxv vs1, 16(r3)
120; CHECK-P9-NEXT:    stxv vs3, 32(r3)
121; CHECK-P9-NEXT:    stxv vs0, 48(r3)
122; CHECK-P9-NEXT:    blr
123;
124; CHECK-BE-LABEL: test8elt:
125; CHECK-BE:       # %bb.0: # %entry
126; CHECK-BE-NEXT:    lxv vs1, 0(r4)
127; CHECK-BE-NEXT:    lxv vs0, 16(r4)
128; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
129; CHECK-BE-NEXT:    xvcvuxwdp vs2, v2
130; CHECK-BE-NEXT:    xxmrglw v2, vs1, vs1
131; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
132; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
133; CHECK-BE-NEXT:    xvcvuxwdp vs3, v2
134; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
135; CHECK-BE-NEXT:    stxv vs2, 0(r3)
136; CHECK-BE-NEXT:    xvcvuxwdp vs0, v2
137; CHECK-BE-NEXT:    stxv vs1, 16(r3)
138; CHECK-BE-NEXT:    stxv vs3, 32(r3)
139; CHECK-BE-NEXT:    stxv vs0, 48(r3)
140; CHECK-BE-NEXT:    blr
141entry:
142  %a = load <8 x i32>, <8 x i32>* %0, align 32
143  %1 = uitofp <8 x i32> %a to <8 x double>
144  store <8 x double> %1, <8 x double>* %agg.result, align 64
145  ret void
146}
147
148define void @test16elt(<16 x double>* noalias nocapture sret(<16 x double>) %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
149; CHECK-P8-LABEL: test16elt:
150; CHECK-P8:       # %bb.0: # %entry
151; CHECK-P8-NEXT:    li r5, 16
152; CHECK-P8-NEXT:    li r6, 48
153; CHECK-P8-NEXT:    li r7, 32
154; CHECK-P8-NEXT:    li r8, 64
155; CHECK-P8-NEXT:    lvx v2, r4, r5
156; CHECK-P8-NEXT:    lvx v3, r4, r6
157; CHECK-P8-NEXT:    lvx v0, r4, r7
158; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
159; CHECK-P8-NEXT:    xxmrghw v5, v3, v3
160; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
161; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
162; CHECK-P8-NEXT:    xvcvuxwdp vs0, v4
163; CHECK-P8-NEXT:    lvx v4, 0, r4
164; CHECK-P8-NEXT:    li r4, 112
165; CHECK-P8-NEXT:    xvcvuxwdp vs1, v5
166; CHECK-P8-NEXT:    xxmrghw v5, v0, v0
167; CHECK-P8-NEXT:    xxmrglw v0, v0, v0
168; CHECK-P8-NEXT:    xvcvuxwdp vs2, v2
169; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
170; CHECK-P8-NEXT:    xvcvuxwdp vs3, v3
171; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
172; CHECK-P8-NEXT:    xvcvuxwdp vs4, v5
173; CHECK-P8-NEXT:    xvcvuxwdp vs5, v0
174; CHECK-P8-NEXT:    xvcvuxwdp vs6, v2
175; CHECK-P8-NEXT:    xxswapd vs0, vs0
176; CHECK-P8-NEXT:    xvcvuxwdp vs7, v3
177; CHECK-P8-NEXT:    xxswapd vs1, vs1
178; CHECK-P8-NEXT:    xxswapd vs2, vs2
179; CHECK-P8-NEXT:    xxswapd vs3, vs3
180; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
181; CHECK-P8-NEXT:    li r4, 96
182; CHECK-P8-NEXT:    xxswapd vs4, vs4
183; CHECK-P8-NEXT:    xxswapd vs1, vs5
184; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
185; CHECK-P8-NEXT:    xxswapd vs5, vs6
186; CHECK-P8-NEXT:    li r4, 80
187; CHECK-P8-NEXT:    xxswapd vs3, vs7
188; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
189; CHECK-P8-NEXT:    stxvd2x vs1, r3, r8
190; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
191; CHECK-P8-NEXT:    stxvd2x vs0, r3, r7
192; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
193; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
194; CHECK-P8-NEXT:    blr
195;
196; CHECK-P9-LABEL: test16elt:
197; CHECK-P9:       # %bb.0: # %entry
198; CHECK-P9-NEXT:    lxv vs0, 0(r4)
199; CHECK-P9-NEXT:    lxv vs2, 16(r4)
200; CHECK-P9-NEXT:    lxv vs5, 32(r4)
201; CHECK-P9-NEXT:    lxv vs4, 48(r4)
202; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
203; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
204; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
205; CHECK-P9-NEXT:    xvcvuxwdp vs0, v2
206; CHECK-P9-NEXT:    xxmrglw v2, vs2, vs2
207; CHECK-P9-NEXT:    xvcvuxwdp vs3, v2
208; CHECK-P9-NEXT:    xxmrghw v2, vs2, vs2
209; CHECK-P9-NEXT:    stxv vs1, 0(r3)
210; CHECK-P9-NEXT:    stxv vs0, 16(r3)
211; CHECK-P9-NEXT:    xvcvuxwdp vs2, v2
212; CHECK-P9-NEXT:    xxmrglw v2, vs5, vs5
213; CHECK-P9-NEXT:    xvcvuxwdp vs6, v2
214; CHECK-P9-NEXT:    xxmrghw v2, vs5, vs5
215; CHECK-P9-NEXT:    stxv vs3, 32(r3)
216; CHECK-P9-NEXT:    stxv vs2, 48(r3)
217; CHECK-P9-NEXT:    xvcvuxwdp vs5, v2
218; CHECK-P9-NEXT:    xxmrglw v2, vs4, vs4
219; CHECK-P9-NEXT:    xvcvuxwdp vs7, v2
220; CHECK-P9-NEXT:    xxmrghw v2, vs4, vs4
221; CHECK-P9-NEXT:    stxv vs6, 64(r3)
222; CHECK-P9-NEXT:    stxv vs5, 80(r3)
223; CHECK-P9-NEXT:    xvcvuxwdp vs4, v2
224; CHECK-P9-NEXT:    stxv vs7, 96(r3)
225; CHECK-P9-NEXT:    stxv vs4, 112(r3)
226; CHECK-P9-NEXT:    blr
227;
228; CHECK-BE-LABEL: test16elt:
229; CHECK-BE:       # %bb.0: # %entry
230; CHECK-BE-NEXT:    lxv vs0, 0(r4)
231; CHECK-BE-NEXT:    lxv vs2, 16(r4)
232; CHECK-BE-NEXT:    lxv vs5, 32(r4)
233; CHECK-BE-NEXT:    lxv vs4, 48(r4)
234; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
235; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
236; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
237; CHECK-BE-NEXT:    xvcvuxwdp vs0, v2
238; CHECK-BE-NEXT:    xxmrghw v2, vs2, vs2
239; CHECK-BE-NEXT:    xvcvuxwdp vs3, v2
240; CHECK-BE-NEXT:    xxmrglw v2, vs2, vs2
241; CHECK-BE-NEXT:    stxv vs1, 0(r3)
242; CHECK-BE-NEXT:    stxv vs0, 16(r3)
243; CHECK-BE-NEXT:    xvcvuxwdp vs2, v2
244; CHECK-BE-NEXT:    xxmrghw v2, vs5, vs5
245; CHECK-BE-NEXT:    xvcvuxwdp vs6, v2
246; CHECK-BE-NEXT:    xxmrglw v2, vs5, vs5
247; CHECK-BE-NEXT:    stxv vs3, 32(r3)
248; CHECK-BE-NEXT:    stxv vs2, 48(r3)
249; CHECK-BE-NEXT:    xvcvuxwdp vs5, v2
250; CHECK-BE-NEXT:    xxmrghw v2, vs4, vs4
251; CHECK-BE-NEXT:    xvcvuxwdp vs7, v2
252; CHECK-BE-NEXT:    xxmrglw v2, vs4, vs4
253; CHECK-BE-NEXT:    stxv vs6, 64(r3)
254; CHECK-BE-NEXT:    stxv vs5, 80(r3)
255; CHECK-BE-NEXT:    xvcvuxwdp vs4, v2
256; CHECK-BE-NEXT:    stxv vs7, 96(r3)
257; CHECK-BE-NEXT:    stxv vs4, 112(r3)
258; CHECK-BE-NEXT:    blr
259entry:
260  %a = load <16 x i32>, <16 x i32>* %0, align 64
261  %1 = uitofp <16 x i32> %a to <16 x double>
262  store <16 x double> %1, <16 x double>* %agg.result, align 128
263  ret void
264}
265
266define <2 x double> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
267; CHECK-P8-LABEL: test2elt_signed:
268; CHECK-P8:       # %bb.0: # %entry
269; CHECK-P8-NEXT:    mtfprd f0, r3
270; CHECK-P8-NEXT:    xxswapd v2, vs0
271; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
272; CHECK-P8-NEXT:    xvcvsxwdp v2, v2
273; CHECK-P8-NEXT:    blr
274;
275; CHECK-P9-LABEL: test2elt_signed:
276; CHECK-P9:       # %bb.0: # %entry
277; CHECK-P9-NEXT:    mtfprd f0, r3
278; CHECK-P9-NEXT:    xxswapd v2, vs0
279; CHECK-P9-NEXT:    xxmrglw v2, v2, v2
280; CHECK-P9-NEXT:    xvcvsxwdp v2, v2
281; CHECK-P9-NEXT:    blr
282;
283; CHECK-BE-LABEL: test2elt_signed:
284; CHECK-BE:       # %bb.0: # %entry
285; CHECK-BE-NEXT:    mtfprd f0, r3
286; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
287; CHECK-BE-NEXT:    xvcvsxwdp v2, v2
288; CHECK-BE-NEXT:    blr
289entry:
290  %0 = bitcast i64 %a.coerce to <2 x i32>
291  %1 = sitofp <2 x i32> %0 to <2 x double>
292  ret <2 x double> %1
293}
294
295define void @test4elt_signed(<4 x double>* noalias nocapture sret(<4 x double>) %agg.result, <4 x i32> %a) local_unnamed_addr #1 {
296; CHECK-P8-LABEL: test4elt_signed:
297; CHECK-P8:       # %bb.0: # %entry
298; CHECK-P8-NEXT:    xxmrglw v3, v2, v2
299; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
300; CHECK-P8-NEXT:    li r4, 16
301; CHECK-P8-NEXT:    xvcvsxwdp vs0, v3
302; CHECK-P8-NEXT:    xvcvsxwdp vs1, v2
303; CHECK-P8-NEXT:    xxswapd vs0, vs0
304; CHECK-P8-NEXT:    xxswapd vs1, vs1
305; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
306; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
307; CHECK-P8-NEXT:    blr
308;
309; CHECK-P9-LABEL: test4elt_signed:
310; CHECK-P9:       # %bb.0: # %entry
311; CHECK-P9-NEXT:    xxmrglw v3, v2, v2
312; CHECK-P9-NEXT:    xxmrghw v2, v2, v2
313; CHECK-P9-NEXT:    xvcvsxwdp vs0, v3
314; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
315; CHECK-P9-NEXT:    stxv vs1, 16(r3)
316; CHECK-P9-NEXT:    stxv vs0, 0(r3)
317; CHECK-P9-NEXT:    blr
318;
319; CHECK-BE-LABEL: test4elt_signed:
320; CHECK-BE:       # %bb.0: # %entry
321; CHECK-BE-NEXT:    xxmrghw v3, v2, v2
322; CHECK-BE-NEXT:    xxmrglw v2, v2, v2
323; CHECK-BE-NEXT:    xvcvsxwdp vs0, v3
324; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
325; CHECK-BE-NEXT:    stxv vs1, 16(r3)
326; CHECK-BE-NEXT:    stxv vs0, 0(r3)
327; CHECK-BE-NEXT:    blr
328entry:
329  %0 = sitofp <4 x i32> %a to <4 x double>
330  store <4 x double> %0, <4 x double>* %agg.result, align 32
331  ret void
332}
333
334define void @test8elt_signed(<8 x double>* noalias nocapture sret(<8 x double>) %agg.result, <8 x i32>* nocapture readonly) local_unnamed_addr #2 {
335; CHECK-P8-LABEL: test8elt_signed:
336; CHECK-P8:       # %bb.0: # %entry
337; CHECK-P8-NEXT:    li r5, 16
338; CHECK-P8-NEXT:    lvx v3, 0, r4
339; CHECK-P8-NEXT:    li r6, 32
340; CHECK-P8-NEXT:    lvx v2, r4, r5
341; CHECK-P8-NEXT:    li r4, 48
342; CHECK-P8-NEXT:    xxmrglw v5, v3, v3
343; CHECK-P8-NEXT:    xxmrghw v3, v3, v3
344; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
345; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
346; CHECK-P8-NEXT:    xvcvsxwdp vs2, v5
347; CHECK-P8-NEXT:    xvcvsxwdp vs0, v4
348; CHECK-P8-NEXT:    xvcvsxwdp vs1, v2
349; CHECK-P8-NEXT:    xvcvsxwdp vs3, v3
350; CHECK-P8-NEXT:    xxswapd vs2, vs2
351; CHECK-P8-NEXT:    xxswapd vs0, vs0
352; CHECK-P8-NEXT:    xxswapd vs1, vs1
353; CHECK-P8-NEXT:    xxswapd vs3, vs3
354; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
355; CHECK-P8-NEXT:    stxvd2x vs0, r3, r6
356; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
357; CHECK-P8-NEXT:    stxvd2x vs2, 0, r3
358; CHECK-P8-NEXT:    blr
359;
360; CHECK-P9-LABEL: test8elt_signed:
361; CHECK-P9:       # %bb.0: # %entry
362; CHECK-P9-NEXT:    lxv vs1, 0(r4)
363; CHECK-P9-NEXT:    lxv vs0, 16(r4)
364; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
365; CHECK-P9-NEXT:    xvcvsxwdp vs2, v2
366; CHECK-P9-NEXT:    xxmrghw v2, vs1, vs1
367; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
368; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
369; CHECK-P9-NEXT:    xvcvsxwdp vs3, v2
370; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
371; CHECK-P9-NEXT:    stxv vs2, 0(r3)
372; CHECK-P9-NEXT:    xvcvsxwdp vs0, v2
373; CHECK-P9-NEXT:    stxv vs1, 16(r3)
374; CHECK-P9-NEXT:    stxv vs3, 32(r3)
375; CHECK-P9-NEXT:    stxv vs0, 48(r3)
376; CHECK-P9-NEXT:    blr
377;
378; CHECK-BE-LABEL: test8elt_signed:
379; CHECK-BE:       # %bb.0: # %entry
380; CHECK-BE-NEXT:    lxv vs1, 0(r4)
381; CHECK-BE-NEXT:    lxv vs0, 16(r4)
382; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
383; CHECK-BE-NEXT:    xvcvsxwdp vs2, v2
384; CHECK-BE-NEXT:    xxmrglw v2, vs1, vs1
385; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
386; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
387; CHECK-BE-NEXT:    xvcvsxwdp vs3, v2
388; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
389; CHECK-BE-NEXT:    stxv vs2, 0(r3)
390; CHECK-BE-NEXT:    xvcvsxwdp vs0, v2
391; CHECK-BE-NEXT:    stxv vs1, 16(r3)
392; CHECK-BE-NEXT:    stxv vs3, 32(r3)
393; CHECK-BE-NEXT:    stxv vs0, 48(r3)
394; CHECK-BE-NEXT:    blr
395entry:
396  %a = load <8 x i32>, <8 x i32>* %0, align 32
397  %1 = sitofp <8 x i32> %a to <8 x double>
398  store <8 x double> %1, <8 x double>* %agg.result, align 64
399  ret void
400}
401
402define void @test16elt_signed(<16 x double>* noalias nocapture sret(<16 x double>) %agg.result, <16 x i32>* nocapture readonly) local_unnamed_addr #2 {
403; CHECK-P8-LABEL: test16elt_signed:
404; CHECK-P8:       # %bb.0: # %entry
405; CHECK-P8-NEXT:    li r5, 16
406; CHECK-P8-NEXT:    li r6, 48
407; CHECK-P8-NEXT:    li r7, 32
408; CHECK-P8-NEXT:    li r8, 64
409; CHECK-P8-NEXT:    lvx v2, r4, r5
410; CHECK-P8-NEXT:    lvx v3, r4, r6
411; CHECK-P8-NEXT:    lvx v0, r4, r7
412; CHECK-P8-NEXT:    xxmrglw v4, v2, v2
413; CHECK-P8-NEXT:    xxmrghw v5, v3, v3
414; CHECK-P8-NEXT:    xxmrghw v2, v2, v2
415; CHECK-P8-NEXT:    xxmrglw v3, v3, v3
416; CHECK-P8-NEXT:    xvcvsxwdp vs0, v4
417; CHECK-P8-NEXT:    lvx v4, 0, r4
418; CHECK-P8-NEXT:    li r4, 112
419; CHECK-P8-NEXT:    xvcvsxwdp vs1, v5
420; CHECK-P8-NEXT:    xxmrghw v5, v0, v0
421; CHECK-P8-NEXT:    xxmrglw v0, v0, v0
422; CHECK-P8-NEXT:    xvcvsxwdp vs2, v2
423; CHECK-P8-NEXT:    xxmrglw v2, v4, v4
424; CHECK-P8-NEXT:    xvcvsxwdp vs3, v3
425; CHECK-P8-NEXT:    xxmrghw v3, v4, v4
426; CHECK-P8-NEXT:    xvcvsxwdp vs4, v5
427; CHECK-P8-NEXT:    xvcvsxwdp vs5, v0
428; CHECK-P8-NEXT:    xvcvsxwdp vs6, v2
429; CHECK-P8-NEXT:    xxswapd vs0, vs0
430; CHECK-P8-NEXT:    xvcvsxwdp vs7, v3
431; CHECK-P8-NEXT:    xxswapd vs1, vs1
432; CHECK-P8-NEXT:    xxswapd vs2, vs2
433; CHECK-P8-NEXT:    xxswapd vs3, vs3
434; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
435; CHECK-P8-NEXT:    li r4, 96
436; CHECK-P8-NEXT:    xxswapd vs4, vs4
437; CHECK-P8-NEXT:    xxswapd vs1, vs5
438; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
439; CHECK-P8-NEXT:    xxswapd vs5, vs6
440; CHECK-P8-NEXT:    li r4, 80
441; CHECK-P8-NEXT:    xxswapd vs3, vs7
442; CHECK-P8-NEXT:    stxvd2x vs4, r3, r4
443; CHECK-P8-NEXT:    stxvd2x vs1, r3, r8
444; CHECK-P8-NEXT:    stxvd2x vs2, r3, r6
445; CHECK-P8-NEXT:    stxvd2x vs0, r3, r7
446; CHECK-P8-NEXT:    stxvd2x vs3, r3, r5
447; CHECK-P8-NEXT:    stxvd2x vs5, 0, r3
448; CHECK-P8-NEXT:    blr
449;
450; CHECK-P9-LABEL: test16elt_signed:
451; CHECK-P9:       # %bb.0: # %entry
452; CHECK-P9-NEXT:    lxv vs0, 0(r4)
453; CHECK-P9-NEXT:    lxv vs2, 16(r4)
454; CHECK-P9-NEXT:    lxv vs5, 32(r4)
455; CHECK-P9-NEXT:    lxv vs4, 48(r4)
456; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
457; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
458; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
459; CHECK-P9-NEXT:    xvcvsxwdp vs0, v2
460; CHECK-P9-NEXT:    xxmrglw v2, vs2, vs2
461; CHECK-P9-NEXT:    xvcvsxwdp vs3, v2
462; CHECK-P9-NEXT:    xxmrghw v2, vs2, vs2
463; CHECK-P9-NEXT:    stxv vs1, 0(r3)
464; CHECK-P9-NEXT:    stxv vs0, 16(r3)
465; CHECK-P9-NEXT:    xvcvsxwdp vs2, v2
466; CHECK-P9-NEXT:    xxmrglw v2, vs5, vs5
467; CHECK-P9-NEXT:    xvcvsxwdp vs6, v2
468; CHECK-P9-NEXT:    xxmrghw v2, vs5, vs5
469; CHECK-P9-NEXT:    stxv vs3, 32(r3)
470; CHECK-P9-NEXT:    stxv vs2, 48(r3)
471; CHECK-P9-NEXT:    xvcvsxwdp vs5, v2
472; CHECK-P9-NEXT:    xxmrglw v2, vs4, vs4
473; CHECK-P9-NEXT:    xvcvsxwdp vs7, v2
474; CHECK-P9-NEXT:    xxmrghw v2, vs4, vs4
475; CHECK-P9-NEXT:    stxv vs6, 64(r3)
476; CHECK-P9-NEXT:    stxv vs5, 80(r3)
477; CHECK-P9-NEXT:    xvcvsxwdp vs4, v2
478; CHECK-P9-NEXT:    stxv vs7, 96(r3)
479; CHECK-P9-NEXT:    stxv vs4, 112(r3)
480; CHECK-P9-NEXT:    blr
481;
482; CHECK-BE-LABEL: test16elt_signed:
483; CHECK-BE:       # %bb.0: # %entry
484; CHECK-BE-NEXT:    lxv vs0, 0(r4)
485; CHECK-BE-NEXT:    lxv vs2, 16(r4)
486; CHECK-BE-NEXT:    lxv vs5, 32(r4)
487; CHECK-BE-NEXT:    lxv vs4, 48(r4)
488; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
489; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
490; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
491; CHECK-BE-NEXT:    xvcvsxwdp vs0, v2
492; CHECK-BE-NEXT:    xxmrghw v2, vs2, vs2
493; CHECK-BE-NEXT:    xvcvsxwdp vs3, v2
494; CHECK-BE-NEXT:    xxmrglw v2, vs2, vs2
495; CHECK-BE-NEXT:    stxv vs1, 0(r3)
496; CHECK-BE-NEXT:    stxv vs0, 16(r3)
497; CHECK-BE-NEXT:    xvcvsxwdp vs2, v2
498; CHECK-BE-NEXT:    xxmrghw v2, vs5, vs5
499; CHECK-BE-NEXT:    xvcvsxwdp vs6, v2
500; CHECK-BE-NEXT:    xxmrglw v2, vs5, vs5
501; CHECK-BE-NEXT:    stxv vs3, 32(r3)
502; CHECK-BE-NEXT:    stxv vs2, 48(r3)
503; CHECK-BE-NEXT:    xvcvsxwdp vs5, v2
504; CHECK-BE-NEXT:    xxmrghw v2, vs4, vs4
505; CHECK-BE-NEXT:    xvcvsxwdp vs7, v2
506; CHECK-BE-NEXT:    xxmrglw v2, vs4, vs4
507; CHECK-BE-NEXT:    stxv vs6, 64(r3)
508; CHECK-BE-NEXT:    stxv vs5, 80(r3)
509; CHECK-BE-NEXT:    xvcvsxwdp vs4, v2
510; CHECK-BE-NEXT:    stxv vs7, 96(r3)
511; CHECK-BE-NEXT:    stxv vs4, 112(r3)
512; CHECK-BE-NEXT:    blr
513entry:
514  %a = load <16 x i32>, <16 x i32>* %0, align 64
515  %1 = sitofp <16 x i32> %a to <16 x double>
516  store <16 x double> %1, <16 x double>* %agg.result, align 128
517  ret void
518}
519