1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
13; CHECK-P8-LABEL: test2elt:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    mtfprd f0, r3
16; CHECK-P8-NEXT:    mffprd r3, f0
17; CHECK-P8-NEXT:    clrldi r4, r3, 48
18; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
19; CHECK-P8-NEXT:    clrlwi r4, r4, 16
20; CHECK-P8-NEXT:    clrlwi r3, r3, 16
21; CHECK-P8-NEXT:    mtfprwz f0, r4
22; CHECK-P8-NEXT:    mtfprwz f1, r3
23; CHECK-P8-NEXT:    xscvuxdsp f0, f0
24; CHECK-P8-NEXT:    xscvuxdsp f1, f1
25; CHECK-P8-NEXT:    xscvdpspn v2, f0
26; CHECK-P8-NEXT:    xscvdpspn v3, f1
27; CHECK-P8-NEXT:    vmrghw v2, v3, v2
28; CHECK-P8-NEXT:    xxswapd vs0, v2
29; CHECK-P8-NEXT:    mffprd r3, f0
30; CHECK-P8-NEXT:    blr
31;
32; CHECK-P9-LABEL: test2elt:
33; CHECK-P9:       # %bb.0: # %entry
34; CHECK-P9-NEXT:    mtvsrws v2, r3
35; CHECK-P9-NEXT:    vextractuh v3, v2, 14
36; CHECK-P9-NEXT:    vextractuh v2, v2, 12
37; CHECK-P9-NEXT:    xscvuxdsp f0, v3
38; CHECK-P9-NEXT:    xscvdpspn v3, f0
39; CHECK-P9-NEXT:    xscvuxdsp f0, v2
40; CHECK-P9-NEXT:    xscvdpspn v2, f0
41; CHECK-P9-NEXT:    vmrghw v2, v2, v3
42; CHECK-P9-NEXT:    mfvsrld r3, v2
43; CHECK-P9-NEXT:    blr
44;
45; CHECK-BE-LABEL: test2elt:
46; CHECK-BE:       # %bb.0: # %entry
47; CHECK-BE-NEXT:    mtvsrws v2, r3
48; CHECK-BE-NEXT:    vextractuh v3, v2, 2
49; CHECK-BE-NEXT:    vextractuh v2, v2, 0
50; CHECK-BE-NEXT:    xscvuxdsp f0, v3
51; CHECK-BE-NEXT:    xscvdpspn v3, f0
52; CHECK-BE-NEXT:    xscvuxdsp f0, v2
53; CHECK-BE-NEXT:    xscvdpspn v2, f0
54; CHECK-BE-NEXT:    vmrgow v2, v2, v3
55; CHECK-BE-NEXT:    mfvsrd r3, v2
56; CHECK-BE-NEXT:    blr
57entry:
58  %0 = bitcast i32 %a.coerce to <2 x i16>
59  %1 = uitofp <2 x i16> %0 to <2 x float>
60  %2 = bitcast <2 x float> %1 to i64
61  ret i64 %2
62}
63
64define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
65; CHECK-P8-LABEL: test4elt:
66; CHECK-P8:       # %bb.0: # %entry
67; CHECK-P8-NEXT:    xxlxor v2, v2, v2
68; CHECK-P8-NEXT:    mtvsrd v3, r3
69; CHECK-P8-NEXT:    vmrghh v2, v2, v3
70; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
71; CHECK-P8-NEXT:    blr
72;
73; CHECK-P9-LABEL: test4elt:
74; CHECK-P9:       # %bb.0: # %entry
75; CHECK-P9-NEXT:    mtvsrd v2, r3
76; CHECK-P9-NEXT:    xxlxor v3, v3, v3
77; CHECK-P9-NEXT:    vmrghh v2, v3, v2
78; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
79; CHECK-P9-NEXT:    blr
80;
81; CHECK-BE-LABEL: test4elt:
82; CHECK-BE:       # %bb.0: # %entry
83; CHECK-BE-NEXT:    mtvsrd v2, r3
84; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
85; CHECK-BE-NEXT:    xxlxor v4, v4, v4
86; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
87; CHECK-BE-NEXT:    lxv v3, 0(r3)
88; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
89; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
90; CHECK-BE-NEXT:    blr
91entry:
92  %0 = bitcast i64 %a.coerce to <4 x i16>
93  %1 = uitofp <4 x i16> %0 to <4 x float>
94  ret <4 x float> %1
95}
96
97define void @test8elt(<8 x float>* noalias nocapture sret(<8 x float>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
98; CHECK-P8-LABEL: test8elt:
99; CHECK-P8:       # %bb.0: # %entry
100; CHECK-P8-NEXT:    xxlxor v3, v3, v3
101; CHECK-P8-NEXT:    li r4, 16
102; CHECK-P8-NEXT:    vmrglh v4, v3, v2
103; CHECK-P8-NEXT:    vmrghh v2, v3, v2
104; CHECK-P8-NEXT:    xvcvuxwsp v3, v4
105; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
106; CHECK-P8-NEXT:    stvx v3, 0, r3
107; CHECK-P8-NEXT:    stvx v2, r3, r4
108; CHECK-P8-NEXT:    blr
109;
110; CHECK-P9-LABEL: test8elt:
111; CHECK-P9:       # %bb.0: # %entry
112; CHECK-P9-NEXT:    xxlxor v3, v3, v3
113; CHECK-P9-NEXT:    vmrglh v4, v3, v2
114; CHECK-P9-NEXT:    vmrghh v2, v3, v2
115; CHECK-P9-NEXT:    xvcvuxwsp vs0, v4
116; CHECK-P9-NEXT:    xvcvuxwsp vs1, v2
117; CHECK-P9-NEXT:    stxv vs1, 16(r3)
118; CHECK-P9-NEXT:    stxv vs0, 0(r3)
119; CHECK-P9-NEXT:    blr
120;
121; CHECK-BE-LABEL: test8elt:
122; CHECK-BE:       # %bb.0: # %entry
123; CHECK-BE-NEXT:    xxlxor v3, v3, v3
124; CHECK-BE-NEXT:    vmrghh v4, v3, v2
125; CHECK-BE-NEXT:    vmrglh v2, v3, v2
126; CHECK-BE-NEXT:    xvcvuxwsp vs0, v4
127; CHECK-BE-NEXT:    xvcvuxwsp vs1, v2
128; CHECK-BE-NEXT:    stxv vs1, 16(r3)
129; CHECK-BE-NEXT:    stxv vs0, 0(r3)
130; CHECK-BE-NEXT:    blr
131entry:
132  %0 = uitofp <8 x i16> %a to <8 x float>
133  store <8 x float> %0, <8 x float>* %agg.result, align 32
134  ret void
135}
136
137define void @test16elt(<16 x float>* noalias nocapture sret(<16 x float>) %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
138; CHECK-P8-LABEL: test16elt:
139; CHECK-P8:       # %bb.0: # %entry
140; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0@toc@ha
141; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_1@toc@ha
142; CHECK-P8-NEXT:    xxlxor v3, v3, v3
143; CHECK-P8-NEXT:    lvx v4, 0, r4
144; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0@toc@l
145; CHECK-P8-NEXT:    addi r6, r6, .LCPI3_1@toc@l
146; CHECK-P8-NEXT:    lvx v2, 0, r5
147; CHECK-P8-NEXT:    li r5, 16
148; CHECK-P8-NEXT:    lvx v0, 0, r6
149; CHECK-P8-NEXT:    li r6, 32
150; CHECK-P8-NEXT:    lvx v5, r4, r5
151; CHECK-P8-NEXT:    li r4, 48
152; CHECK-P8-NEXT:    vperm v1, v3, v4, v2
153; CHECK-P8-NEXT:    vperm v2, v3, v5, v2
154; CHECK-P8-NEXT:    vperm v5, v3, v5, v0
155; CHECK-P8-NEXT:    vperm v3, v3, v4, v0
156; CHECK-P8-NEXT:    xvcvuxwsp v4, v1
157; CHECK-P8-NEXT:    xvcvuxwsp v2, v2
158; CHECK-P8-NEXT:    xvcvuxwsp v5, v5
159; CHECK-P8-NEXT:    xvcvuxwsp v3, v3
160; CHECK-P8-NEXT:    stvx v4, 0, r3
161; CHECK-P8-NEXT:    stvx v2, r3, r6
162; CHECK-P8-NEXT:    stvx v5, r3, r4
163; CHECK-P8-NEXT:    stvx v3, r3, r5
164; CHECK-P8-NEXT:    blr
165;
166; CHECK-P9-LABEL: test16elt:
167; CHECK-P9:       # %bb.0: # %entry
168; CHECK-P9-NEXT:    lxv v2, 16(r4)
169; CHECK-P9-NEXT:    lxv v3, 0(r4)
170; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
171; CHECK-P9-NEXT:    xxlxor v5, v5, v5
172; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
173; CHECK-P9-NEXT:    lxv v4, 0(r4)
174; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
175; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
176; CHECK-P9-NEXT:    vperm v0, v5, v3, v4
177; CHECK-P9-NEXT:    xvcvuxwsp vs0, v0
178; CHECK-P9-NEXT:    lxv v0, 0(r4)
179; CHECK-P9-NEXT:    vperm v3, v5, v3, v0
180; CHECK-P9-NEXT:    stxv vs0, 0(r3)
181; CHECK-P9-NEXT:    xvcvuxwsp vs1, v3
182; CHECK-P9-NEXT:    vperm v3, v5, v2, v4
183; CHECK-P9-NEXT:    vperm v2, v5, v2, v0
184; CHECK-P9-NEXT:    xvcvuxwsp vs2, v3
185; CHECK-P9-NEXT:    xvcvuxwsp vs3, v2
186; CHECK-P9-NEXT:    stxv vs1, 16(r3)
187; CHECK-P9-NEXT:    stxv vs3, 48(r3)
188; CHECK-P9-NEXT:    stxv vs2, 32(r3)
189; CHECK-P9-NEXT:    blr
190;
191; CHECK-BE-LABEL: test16elt:
192; CHECK-BE:       # %bb.0: # %entry
193; CHECK-BE-NEXT:    lxv v2, 16(r4)
194; CHECK-BE-NEXT:    lxv v3, 0(r4)
195; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
196; CHECK-BE-NEXT:    xxlxor v5, v5, v5
197; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
198; CHECK-BE-NEXT:    lxv v4, 0(r4)
199; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
200; CHECK-BE-NEXT:    vperm v4, v5, v2, v4
201; CHECK-BE-NEXT:    vmrglh v3, v5, v3
202; CHECK-BE-NEXT:    vmrglh v2, v5, v2
203; CHECK-BE-NEXT:    xvcvuxwsp vs0, v0
204; CHECK-BE-NEXT:    xvcvuxwsp vs1, v4
205; CHECK-BE-NEXT:    stxv vs1, 32(r3)
206; CHECK-BE-NEXT:    xvcvuxwsp vs2, v3
207; CHECK-BE-NEXT:    xvcvuxwsp vs3, v2
208; CHECK-BE-NEXT:    stxv vs3, 48(r3)
209; CHECK-BE-NEXT:    stxv vs2, 16(r3)
210; CHECK-BE-NEXT:    stxv vs0, 0(r3)
211; CHECK-BE-NEXT:    blr
212entry:
213  %a = load <16 x i16>, <16 x i16>* %0, align 32
214  %1 = uitofp <16 x i16> %a to <16 x float>
215  store <16 x float> %1, <16 x float>* %agg.result, align 64
216  ret void
217}
218
219define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
220; CHECK-P8-LABEL: test2elt_signed:
221; CHECK-P8:       # %bb.0: # %entry
222; CHECK-P8-NEXT:    mtfprd f0, r3
223; CHECK-P8-NEXT:    mffprd r3, f0
224; CHECK-P8-NEXT:    clrldi r4, r3, 48
225; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
226; CHECK-P8-NEXT:    extsh r4, r4
227; CHECK-P8-NEXT:    extsh r3, r3
228; CHECK-P8-NEXT:    mtfprwa f0, r4
229; CHECK-P8-NEXT:    mtfprwa f1, r3
230; CHECK-P8-NEXT:    xscvsxdsp f0, f0
231; CHECK-P8-NEXT:    xscvsxdsp f1, f1
232; CHECK-P8-NEXT:    xscvdpspn v2, f0
233; CHECK-P8-NEXT:    xscvdpspn v3, f1
234; CHECK-P8-NEXT:    vmrghw v2, v3, v2
235; CHECK-P8-NEXT:    xxswapd vs0, v2
236; CHECK-P8-NEXT:    mffprd r3, f0
237; CHECK-P8-NEXT:    blr
238;
239; CHECK-P9-LABEL: test2elt_signed:
240; CHECK-P9:       # %bb.0: # %entry
241; CHECK-P9-NEXT:    mtvsrws v2, r3
242; CHECK-P9-NEXT:    vextractuh v3, v2, 14
243; CHECK-P9-NEXT:    vextractuh v2, v2, 12
244; CHECK-P9-NEXT:    vextsh2d v3, v3
245; CHECK-P9-NEXT:    vextsh2d v2, v2
246; CHECK-P9-NEXT:    xscvsxdsp f0, v3
247; CHECK-P9-NEXT:    xscvdpspn v3, f0
248; CHECK-P9-NEXT:    xscvsxdsp f0, v2
249; CHECK-P9-NEXT:    xscvdpspn v2, f0
250; CHECK-P9-NEXT:    vmrghw v2, v2, v3
251; CHECK-P9-NEXT:    mfvsrld r3, v2
252; CHECK-P9-NEXT:    blr
253;
254; CHECK-BE-LABEL: test2elt_signed:
255; CHECK-BE:       # %bb.0: # %entry
256; CHECK-BE-NEXT:    mtvsrws v2, r3
257; CHECK-BE-NEXT:    vextractuh v3, v2, 2
258; CHECK-BE-NEXT:    vextractuh v2, v2, 0
259; CHECK-BE-NEXT:    vextsh2d v3, v3
260; CHECK-BE-NEXT:    vextsh2d v2, v2
261; CHECK-BE-NEXT:    xscvsxdsp f0, v3
262; CHECK-BE-NEXT:    xscvdpspn v3, f0
263; CHECK-BE-NEXT:    xscvsxdsp f0, v2
264; CHECK-BE-NEXT:    xscvdpspn v2, f0
265; CHECK-BE-NEXT:    vmrgow v2, v2, v3
266; CHECK-BE-NEXT:    mfvsrd r3, v2
267; CHECK-BE-NEXT:    blr
268entry:
269  %0 = bitcast i32 %a.coerce to <2 x i16>
270  %1 = sitofp <2 x i16> %0 to <2 x float>
271  %2 = bitcast <2 x float> %1 to i64
272  ret i64 %2
273}
274
275define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
276; CHECK-P8-LABEL: test4elt_signed:
277; CHECK-P8:       # %bb.0: # %entry
278; CHECK-P8-NEXT:    mtvsrd v2, r3
279; CHECK-P8-NEXT:    vspltisw v3, 8
280; CHECK-P8-NEXT:    vmrghh v2, v2, v2
281; CHECK-P8-NEXT:    vadduwm v3, v3, v3
282; CHECK-P8-NEXT:    vslw v2, v2, v3
283; CHECK-P8-NEXT:    vsraw v2, v2, v3
284; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
285; CHECK-P8-NEXT:    blr
286;
287; CHECK-P9-LABEL: test4elt_signed:
288; CHECK-P9:       # %bb.0: # %entry
289; CHECK-P9-NEXT:    mtvsrd v2, r3
290; CHECK-P9-NEXT:    vmrghh v2, v2, v2
291; CHECK-P9-NEXT:    vextsh2w v2, v2
292; CHECK-P9-NEXT:    xvcvsxwsp v2, v2
293; CHECK-P9-NEXT:    blr
294;
295; CHECK-BE-LABEL: test4elt_signed:
296; CHECK-BE:       # %bb.0: # %entry
297; CHECK-BE-NEXT:    mtvsrd v2, r3
298; CHECK-BE-NEXT:    vmrghh v2, v2, v2
299; CHECK-BE-NEXT:    vextsh2w v2, v2
300; CHECK-BE-NEXT:    xvcvsxwsp v2, v2
301; CHECK-BE-NEXT:    blr
302entry:
303  %0 = bitcast i64 %a.coerce to <4 x i16>
304  %1 = sitofp <4 x i16> %0 to <4 x float>
305  ret <4 x float> %1
306}
307
308define void @test8elt_signed(<8 x float>* noalias nocapture sret(<8 x float>) %agg.result, <8 x i16> %a) local_unnamed_addr #2 {
309; CHECK-P8-LABEL: test8elt_signed:
310; CHECK-P8:       # %bb.0: # %entry
311; CHECK-P8-NEXT:    vmrglh v4, v2, v2
312; CHECK-P8-NEXT:    vspltisw v3, 8
313; CHECK-P8-NEXT:    li r4, 16
314; CHECK-P8-NEXT:    vmrghh v2, v2, v2
315; CHECK-P8-NEXT:    vadduwm v3, v3, v3
316; CHECK-P8-NEXT:    vslw v4, v4, v3
317; CHECK-P8-NEXT:    vslw v2, v2, v3
318; CHECK-P8-NEXT:    vsraw v4, v4, v3
319; CHECK-P8-NEXT:    vsraw v2, v2, v3
320; CHECK-P8-NEXT:    xvcvsxwsp v3, v4
321; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
322; CHECK-P8-NEXT:    stvx v3, 0, r3
323; CHECK-P8-NEXT:    stvx v2, r3, r4
324; CHECK-P8-NEXT:    blr
325;
326; CHECK-P9-LABEL: test8elt_signed:
327; CHECK-P9:       # %bb.0: # %entry
328; CHECK-P9-NEXT:    vmrglh v3, v2, v2
329; CHECK-P9-NEXT:    vmrghh v2, v2, v2
330; CHECK-P9-NEXT:    vextsh2w v3, v3
331; CHECK-P9-NEXT:    vextsh2w v2, v2
332; CHECK-P9-NEXT:    xvcvsxwsp vs0, v3
333; CHECK-P9-NEXT:    xvcvsxwsp vs1, v2
334; CHECK-P9-NEXT:    stxv vs1, 16(r3)
335; CHECK-P9-NEXT:    stxv vs0, 0(r3)
336; CHECK-P9-NEXT:    blr
337;
338; CHECK-BE-LABEL: test8elt_signed:
339; CHECK-BE:       # %bb.0: # %entry
340; CHECK-BE-NEXT:    xxlxor v3, v3, v3
341; CHECK-BE-NEXT:    vmrglh v3, v3, v2
342; CHECK-BE-NEXT:    vmrghh v2, v2, v2
343; CHECK-BE-NEXT:    vextsh2w v3, v3
344; CHECK-BE-NEXT:    vextsh2w v2, v2
345; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
346; CHECK-BE-NEXT:    xvcvsxwsp vs1, v2
347; CHECK-BE-NEXT:    stxv vs0, 16(r3)
348; CHECK-BE-NEXT:    stxv vs1, 0(r3)
349; CHECK-BE-NEXT:    blr
350entry:
351  %0 = sitofp <8 x i16> %a to <8 x float>
352  store <8 x float> %0, <8 x float>* %agg.result, align 32
353  ret void
354}
355
356define void @test16elt_signed(<16 x float>* noalias nocapture sret(<16 x float>) %agg.result, <16 x i16>* nocapture readonly) local_unnamed_addr #3 {
357; CHECK-P8-LABEL: test16elt_signed:
358; CHECK-P8:       # %bb.0: # %entry
359; CHECK-P8-NEXT:    li r5, 16
360; CHECK-P8-NEXT:    lvx v2, 0, r4
361; CHECK-P8-NEXT:    vspltisw v5, 8
362; CHECK-P8-NEXT:    li r6, 32
363; CHECK-P8-NEXT:    lvx v3, r4, r5
364; CHECK-P8-NEXT:    li r4, 48
365; CHECK-P8-NEXT:    vmrglh v4, v2, v2
366; CHECK-P8-NEXT:    vmrglh v0, v3, v3
367; CHECK-P8-NEXT:    vmrghh v3, v3, v3
368; CHECK-P8-NEXT:    vmrghh v2, v2, v2
369; CHECK-P8-NEXT:    vadduwm v5, v5, v5
370; CHECK-P8-NEXT:    vslw v4, v4, v5
371; CHECK-P8-NEXT:    vslw v0, v0, v5
372; CHECK-P8-NEXT:    vslw v3, v3, v5
373; CHECK-P8-NEXT:    vslw v2, v2, v5
374; CHECK-P8-NEXT:    vsraw v4, v4, v5
375; CHECK-P8-NEXT:    vsraw v0, v0, v5
376; CHECK-P8-NEXT:    vsraw v3, v3, v5
377; CHECK-P8-NEXT:    vsraw v2, v2, v5
378; CHECK-P8-NEXT:    xvcvsxwsp v4, v4
379; CHECK-P8-NEXT:    xvcvsxwsp v5, v0
380; CHECK-P8-NEXT:    xvcvsxwsp v3, v3
381; CHECK-P8-NEXT:    xvcvsxwsp v2, v2
382; CHECK-P8-NEXT:    stvx v4, 0, r3
383; CHECK-P8-NEXT:    stvx v5, r3, r6
384; CHECK-P8-NEXT:    stvx v3, r3, r4
385; CHECK-P8-NEXT:    stvx v2, r3, r5
386; CHECK-P8-NEXT:    blr
387;
388; CHECK-P9-LABEL: test16elt_signed:
389; CHECK-P9:       # %bb.0: # %entry
390; CHECK-P9-NEXT:    lxv v3, 0(r4)
391; CHECK-P9-NEXT:    lxv v2, 16(r4)
392; CHECK-P9-NEXT:    vmrglh v4, v3, v3
393; CHECK-P9-NEXT:    vmrghh v3, v3, v3
394; CHECK-P9-NEXT:    vextsh2w v3, v3
395; CHECK-P9-NEXT:    vextsh2w v4, v4
396; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
397; CHECK-P9-NEXT:    vmrglh v3, v2, v2
398; CHECK-P9-NEXT:    vmrghh v2, v2, v2
399; CHECK-P9-NEXT:    xvcvsxwsp vs0, v4
400; CHECK-P9-NEXT:    vextsh2w v3, v3
401; CHECK-P9-NEXT:    vextsh2w v2, v2
402; CHECK-P9-NEXT:    xvcvsxwsp vs2, v3
403; CHECK-P9-NEXT:    xvcvsxwsp vs3, v2
404; CHECK-P9-NEXT:    stxv vs1, 16(r3)
405; CHECK-P9-NEXT:    stxv vs0, 0(r3)
406; CHECK-P9-NEXT:    stxv vs3, 48(r3)
407; CHECK-P9-NEXT:    stxv vs2, 32(r3)
408; CHECK-P9-NEXT:    blr
409;
410; CHECK-BE-LABEL: test16elt_signed:
411; CHECK-BE:       # %bb.0: # %entry
412; CHECK-BE-NEXT:    lxv v2, 16(r4)
413; CHECK-BE-NEXT:    lxv v3, 0(r4)
414; CHECK-BE-NEXT:    xxlxor v4, v4, v4
415; CHECK-BE-NEXT:    vmrglh v5, v4, v3
416; CHECK-BE-NEXT:    vmrglh v4, v4, v2
417; CHECK-BE-NEXT:    vmrghh v3, v3, v3
418; CHECK-BE-NEXT:    vmrghh v2, v2, v2
419; CHECK-BE-NEXT:    vextsh2w v5, v5
420; CHECK-BE-NEXT:    vextsh2w v4, v4
421; CHECK-BE-NEXT:    vextsh2w v3, v3
422; CHECK-BE-NEXT:    vextsh2w v2, v2
423; CHECK-BE-NEXT:    xvcvsxwsp vs0, v5
424; CHECK-BE-NEXT:    xvcvsxwsp vs1, v4
425; CHECK-BE-NEXT:    xvcvsxwsp vs2, v3
426; CHECK-BE-NEXT:    xvcvsxwsp vs3, v2
427; CHECK-BE-NEXT:    stxv vs1, 48(r3)
428; CHECK-BE-NEXT:    stxv vs3, 32(r3)
429; CHECK-BE-NEXT:    stxv vs0, 16(r3)
430; CHECK-BE-NEXT:    stxv vs2, 0(r3)
431; CHECK-BE-NEXT:    blr
432entry:
433  %a = load <16 x i16>, <16 x i16>* %0, align 32
434  %1 = sitofp <16 x i16> %a to <16 x float>
435  store <16 x float> %1, <16 x float>* %agg.result, align 64
436  ret void
437}
438