1; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
2; RUN:   -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s
3
4; RUN: llc -verify-machineinstrs -mcpu=pwr9 \
5; RUN:   -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s
6
7@uca = global <16 x i8> zeroinitializer, align 16
8@ucb = global <16 x i8> zeroinitializer, align 16
9@sca = global <16 x i8> zeroinitializer, align 16
10@scb = global <16 x i8> zeroinitializer, align 16
11@usa = global <8 x i16> zeroinitializer, align 16
12@usb = global <8 x i16> zeroinitializer, align 16
13@ssa = global <8 x i16> zeroinitializer, align 16
14@ssb = global <8 x i16> zeroinitializer, align 16
15@uia = global <4 x i32> zeroinitializer, align 16
16@uib = global <4 x i32> zeroinitializer, align 16
17@sia = global <4 x i32> zeroinitializer, align 16
18@sib = global <4 x i32> zeroinitializer, align 16
19@ulla = global <2 x i64> zeroinitializer, align 16
20@ullb = global <2 x i64> zeroinitializer, align 16
21@slla = global <2 x i64> zeroinitializer, align 16
22@sllb = global <2 x i64> zeroinitializer, align 16
23@uxa = global <1 x i128> zeroinitializer, align 16
24@uxb = global <1 x i128> zeroinitializer, align 16
25@sxa = global <1 x i128> zeroinitializer, align 16
26@sxb = global <1 x i128> zeroinitializer, align 16
27@vfa = global <4 x float> zeroinitializer, align 16
28@vfb = global <4 x float> zeroinitializer, align 16
29@vda = global <2 x double> zeroinitializer, align 16
30@vdb = global <2 x double> zeroinitializer, align 16
31
32define void @_Z4testv() {
33entry:
34; CHECK-LABEL: @_Z4testv
35  %0 = load <16 x i8>, <16 x i8>* @uca, align 16
36  %1 = load <16 x i8>, <16 x i8>* @ucb, align 16
37  %add.i = add <16 x i8> %1, %0
38  tail call void (...) @sink(<16 x i8> %add.i)
39; CHECK: lxv 34, 0(3)
40; CHECK: lxv 35, 0(3)
41; CHECK: vaddubm 2, 3, 2
42; CHECK: stxv 34,
43; CHECK: bl sink
44  %2 = load <16 x i8>, <16 x i8>* @sca, align 16
45  %3 = load <16 x i8>, <16 x i8>* @scb, align 16
46  %add.i22 = add <16 x i8> %3, %2
47  tail call void (...) @sink(<16 x i8> %add.i22)
48; CHECK: lxv 34, 0(3)
49; CHECK: lxv 35, 0(3)
50; CHECK: vaddubm 2, 3, 2
51; CHECK: stxv 34,
52; CHECK: bl sink
53  %4 = load <8 x i16>, <8 x i16>* @usa, align 16
54  %5 = load <8 x i16>, <8 x i16>* @usb, align 16
55  %add.i21 = add <8 x i16> %5, %4
56  tail call void (...) @sink(<8 x i16> %add.i21)
57; CHECK: lxv 34, 0(3)
58; CHECK: lxv 35, 0(3)
59; CHECK: vadduhm 2, 3, 2
60; CHECK: stxv 34,
61; CHECK: bl sink
62  %6 = load <8 x i16>, <8 x i16>* @ssa, align 16
63  %7 = load <8 x i16>, <8 x i16>* @ssb, align 16
64  %add.i20 = add <8 x i16> %7, %6
65  tail call void (...) @sink(<8 x i16> %add.i20)
66; CHECK: lxv 34, 0(3)
67; CHECK: lxv 35, 0(3)
68; CHECK: vadduhm 2, 3, 2
69; CHECK: stxv 34,
70; CHECK: bl sink
71  %8 = load <4 x i32>, <4 x i32>* @uia, align 16
72  %9 = load <4 x i32>, <4 x i32>* @uib, align 16
73  %add.i19 = add <4 x i32> %9, %8
74  tail call void (...) @sink(<4 x i32> %add.i19)
75; CHECK: lxv 34, 0(3)
76; CHECK: lxv 35, 0(3)
77; CHECK: vadduwm 2, 3, 2
78; CHECK: stxv 34,
79; CHECK: bl sink
80  %10 = load <4 x i32>, <4 x i32>* @sia, align 16
81  %11 = load <4 x i32>, <4 x i32>* @sib, align 16
82  %add.i18 = add <4 x i32> %11, %10
83  tail call void (...) @sink(<4 x i32> %add.i18)
84; CHECK: lxv 34, 0(3)
85; CHECK: lxv 35, 0(3)
86; CHECK: vadduwm 2, 3, 2
87; CHECK: stxv 34,
88; CHECK: bl sink
89  %12 = load <2 x i64>, <2 x i64>* @ulla, align 16
90  %13 = load <2 x i64>, <2 x i64>* @ullb, align 16
91  %add.i17 = add <2 x i64> %13, %12
92  tail call void (...) @sink(<2 x i64> %add.i17)
93; CHECK: lxv 34, 0(3)
94; CHECK: lxv 35, 0(3)
95; CHECK: vaddudm 2, 3, 2
96; CHECK: stxv 34,
97; CHECK: bl sink
98  %14 = load <2 x i64>, <2 x i64>* @slla, align 16
99  %15 = load <2 x i64>, <2 x i64>* @sllb, align 16
100  %add.i16 = add <2 x i64> %15, %14
101  tail call void (...) @sink(<2 x i64> %add.i16)
102; CHECK: lxv 34, 0(3)
103; CHECK: lxv 35, 0(3)
104; CHECK: vaddudm 2, 3, 2
105; CHECK: stxv 34,
106; CHECK: bl sink
107  %16 = load <1 x i128>, <1 x i128>* @uxa, align 16
108  %17 = load <1 x i128>, <1 x i128>* @uxb, align 16
109  %add.i15 = add <1 x i128> %17, %16
110  tail call void (...) @sink(<1 x i128> %add.i15)
111; CHECK: lxv 34, 0(3)
112; CHECK: lxv 35, 0(3)
113; CHECK: vadduqm 2, 3, 2
114; CHECK: stxv 34,
115; CHECK: bl sink
116  %18 = load <1 x i128>, <1 x i128>* @sxa, align 16
117  %19 = load <1 x i128>, <1 x i128>* @sxb, align 16
118  %add.i14 = add <1 x i128> %19, %18
119  tail call void (...) @sink(<1 x i128> %add.i14)
120; CHECK: lxv 34, 0(3)
121; CHECK: lxv 35, 0(3)
122; CHECK: vadduqm 2, 3, 2
123; CHECK: stxv 34,
124; CHECK: bl sink
125  %20 = load <4 x float>, <4 x float>* @vfa, align 16
126  %21 = load <4 x float>, <4 x float>* @vfb, align 16
127  %add.i13 = fadd <4 x float> %20, %21
128  tail call void (...) @sink(<4 x float> %add.i13)
129; CHECK: lxv 0, 0(3)
130; CHECK: lxv 1, 0(3)
131; CHECK: xvaddsp 34, 0, 1
132; CHECK: stxv 34,
133; CHECK: bl sink
134  %22 = load <2 x double>, <2 x double>* @vda, align 16
135  %23 = load <2 x double>, <2 x double>* @vdb, align 16
136  %add.i12 = fadd <2 x double> %22, %23
137  tail call void (...) @sink(<2 x double> %add.i12)
138; CHECK: lxv 0, 0(3)
139; CHECK: lxv 1, 0(3)
140; CHECK: xvadddp 0, 0, 1
141; CHECK: stxv 0,
142; CHECK: bl sink
143  ret void
144}
145
146; Function Attrs: nounwind readnone
147define <4 x float> @testXVIEXPSP(<4 x i32> %a, <4 x i32> %b) {
148entry:
149  %0 = tail call <4 x float> @llvm.ppc.vsx.xviexpsp(<4 x i32> %a, <4 x i32> %b)
150  ret <4 x float> %0
151; CHECK-LABEL: testXVIEXPSP
152; CHECK: xviexpsp 34, 34, 35
153; CHECK: blr
154}
155; Function Attrs: nounwind readnone
156declare <4 x float> @llvm.ppc.vsx.xviexpsp(<4 x i32>, <4 x i32>)
157
158; Function Attrs: nounwind readnone
159define <2 x double> @testXVIEXPDP(<2 x i64> %a, <2 x i64> %b) {
160entry:
161  %0 = tail call <2 x double> @llvm.ppc.vsx.xviexpdp(<2 x i64> %a, <2 x i64> %b)
162  ret <2 x double> %0
163; CHECK-LABEL: testXVIEXPDP
164; CHECK: xviexpdp 34, 34, 35
165; CHECK: blr
166}
167; Function Attrs: nounwind readnone
168declare <2 x double> @llvm.ppc.vsx.xviexpdp(<2 x i64>, <2 x i64>)
169
170define <16 x i8> @testVSLV(<16 x i8> %a, <16 x i8> %b) {
171entry:
172  %0 = tail call <16 x i8> @llvm.ppc.altivec.vslv(<16 x i8> %a, <16 x i8> %b)
173  ret <16 x i8> %0
174; CHECK-LABEL: testVSLV
175; CHECK: vslv 2, 2, 3
176; CHECK: blr
177}
178; Function Attrs: nounwind readnone
179declare <16 x i8> @llvm.ppc.altivec.vslv(<16 x i8>, <16 x i8>)
180
181; Function Attrs: nounwind readnone
182define <16 x i8> @testVSRV(<16 x i8> %a, <16 x i8> %b) {
183entry:
184  %0 = tail call <16 x i8> @llvm.ppc.altivec.vsrv(<16 x i8> %a, <16 x i8> %b)
185  ret <16 x i8> %0
186; CHECK-LABEL: testVSRV
187; CHECK: vsrv 2, 2, 3
188; CHECK: blr
189}
190; Function Attrs: nounwind readnone
191declare <16 x i8> @llvm.ppc.altivec.vsrv(<16 x i8>, <16 x i8>)
192
193; Function Attrs: nounwind readnone
194define <8 x i16> @testXVCVSPHP(<4 x float> %a) {
195entry:
196; CHECK-LABEL: testXVCVSPHP
197; CHECK: xvcvsphp 34, 34
198; CHECK: blr
199  %0 = tail call <4 x float> @llvm.ppc.vsx.xvcvsphp(<4 x float> %a)
200  %1 = bitcast <4 x float> %0 to <8 x i16>
201  ret <8 x i16> %1
202}
203
204; Function Attrs: nounwind readnone
205define <4 x i32> @testVRLWMI(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
206entry:
207; CHECK-LABEL: testVRLWMI
208; CHECK: vrlwmi 3, 2, 4
209; CHECK: blr
210  %0 = tail call <4 x i32> @llvm.ppc.altivec.vrlwmi(<4 x i32> %a, <4 x i32> %c, <4 x i32> %b)
211  ret <4 x i32> %0
212}
213
214; Function Attrs: nounwind readnone
215define <2 x i64> @testVRLDMI(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
216entry:
217; CHECK-LABEL: testVRLDMI
218; CHECK: vrldmi 3, 2, 4
219; CHECK: blr
220  %0 = tail call <2 x i64> @llvm.ppc.altivec.vrldmi(<2 x i64> %a, <2 x i64> %c, <2 x i64> %b)
221  ret <2 x i64> %0
222}
223
224; Function Attrs: nounwind readnone
225define <4 x i32> @testVRLWNM(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
226entry:
227  %0 = tail call <4 x i32> @llvm.ppc.altivec.vrlwnm(<4 x i32> %a, <4 x i32> %b)
228  %and.i = and <4 x i32> %0, %c
229  ret <4 x i32> %and.i
230; CHECK-LABEL: testVRLWNM
231; CHECK: vrlwnm 2, 2, 3
232; CHECK: xxland 34, 34, 36
233; CHECK: blr
234}
235
236; Function Attrs: nounwind readnone
237define <2 x i64> @testVRLDNM(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
238entry:
239  %0 = tail call <2 x i64> @llvm.ppc.altivec.vrldnm(<2 x i64> %a, <2 x i64> %b)
240  %and.i = and <2 x i64> %0, %c
241  ret <2 x i64> %and.i
242; CHECK-LABEL: testVRLDNM
243; CHECK: vrldnm 2, 2, 3
244; CHECK: xxland 34, 34, 36
245; CHECK: blr
246}
247
248; Function Attrs: nounwind readnone
249declare <4 x float> @llvm.ppc.vsx.xvcvsphp(<4 x float>)
250
251; Function Attrs: nounwind readnone
252declare <4 x i32> @llvm.ppc.altivec.vrlwmi(<4 x i32>, <4 x i32>, <4 x i32>)
253
254; Function Attrs: nounwind readnone
255declare <2 x i64> @llvm.ppc.altivec.vrldmi(<2 x i64>, <2 x i64>, <2 x i64>)
256
257; Function Attrs: nounwind readnone
258declare <4 x i32> @llvm.ppc.altivec.vrlwnm(<4 x i32>, <4 x i32>)
259
260; Function Attrs: nounwind readnone
261declare <2 x i64> @llvm.ppc.altivec.vrldnm(<2 x i64>, <2 x i64>)
262
263define <4 x i32> @testXVXEXPSP(<4 x float> %a) {
264entry:
265  %0 = tail call <4 x i32> @llvm.ppc.vsx.xvxexpsp(<4 x float> %a)
266  ret <4 x i32> %0
267; CHECK-LABEL: testXVXEXPSP
268; CHECK: xvxexpsp 34, 34
269; CHECK: blr
270}
271; Function Attrs: nounwind readnone
272declare <4 x i32> @llvm.ppc.vsx.xvxexpsp(<4 x float>)
273
274; Function Attrs: nounwind readnone
275define <2 x i64> @testXVXEXPDP(<2 x double> %a) {
276entry:
277  %0 = tail call <2 x i64> @llvm.ppc.vsx.xvxexpdp(<2 x double> %a)
278  ret <2 x i64> %0
279; CHECK-LABEL: testXVXEXPDP
280; CHECK: xvxexpdp 34, 34
281; CHECK: blr
282}
283; Function Attrs: nounwind readnone
284declare <2 x i64>@llvm.ppc.vsx.xvxexpdp(<2 x double>)
285
286; Function Attrs: nounwind readnone
287define <4 x i32> @testXVXSIGSP(<4 x float> %a) {
288entry:
289  %0 = tail call <4 x i32> @llvm.ppc.vsx.xvxsigsp(<4 x float> %a)
290  ret <4 x i32> %0
291; CHECK-LABEL: testXVXSIGSP
292; CHECK: xvxsigsp 34, 34
293; CHECK: blr
294}
295; Function Attrs: nounwind readnone
296declare <4 x i32> @llvm.ppc.vsx.xvxsigsp(<4 x float>)
297
298; Function Attrs: nounwind readnone
299define <2 x i64> @testXVXSIGDP(<2 x double> %a) {
300entry:
301  %0 = tail call <2 x i64> @llvm.ppc.vsx.xvxsigdp(<2 x double> %a)
302  ret <2 x i64> %0
303; CHECK-LABEL: testXVXSIGDP
304; CHECK: xvxsigdp 34, 34
305; CHECK: blr
306}
307; Function Attrs: nounwind readnone
308declare <2 x i64> @llvm.ppc.vsx.xvxsigdp(<2 x double>)
309
310; Function Attrs: nounwind readnone
311define <4 x i32> @testXVTSTDCSP(<4 x float> %a) {
312entry:
313  %0 = tail call <4 x i32> @llvm.ppc.vsx.xvtstdcsp(<4 x float> %a, i32 127)
314  ret <4 x i32> %0
315; CHECK-LABEL: testXVTSTDCSP
316; CHECK: xvtstdcsp 34, 34, 127
317; CHECK: blr
318}
319; Function Attrs: nounwind readnone
320declare <4 x i32> @llvm.ppc.vsx.xvtstdcsp(<4 x float> %a, i32 %b)
321
322; Function Attrs: nounwind readnone
323define <2 x i64> @testXVTSTDCDP(<2 x double> %a) {
324entry:
325  %0 = tail call <2 x i64> @llvm.ppc.vsx.xvtstdcdp(<2 x double> %a, i32 127)
326  ret <2 x i64> %0
327; CHECK-LABEL: testXVTSTDCDP
328; CHECK: xvtstdcdp 34, 34, 127
329; CHECK: blr
330}
331; Function Attrs: nounwind readnone
332declare <2 x i64> @llvm.ppc.vsx.xvtstdcdp(<2 x double> %a, i32 %b)
333
334define <4 x float> @testXVCVHPSP(<8 x i16> %a) {
335entry:
336  %0 = tail call <4 x float>@llvm.ppc.vsx.xvcvhpsp(<8 x i16> %a)
337  ret <4 x float> %0
338; CHECK-LABEL: testXVCVHPSP
339; CHECK: xvcvhpsp 34, 34
340; CHECK: blr
341}
342; Function Attrs: nounwind readnone
343declare <4 x float>@llvm.ppc.vsx.xvcvhpsp(<8 x i16>)
344
345; Function Attrs: nounwind readnone
346define <4 x i32> @testLXVL(i8* %a, i64 %b) {
347entry:
348  %0 = tail call <4 x i32> @llvm.ppc.vsx.lxvl(i8* %a, i64 %b)
349  ret <4 x i32> %0
350; CHECK-LABEL: testLXVL
351; CHECK: lxvl 34, 3, 4
352; CHECK: blr
353}
354; Function Attrs: nounwind readnone
355declare <4 x i32> @llvm.ppc.vsx.lxvl(i8*, i64)
356
357define void @testSTXVL(<4 x i32> %a, i8* %b, i64 %c) {
358entry:
359  tail call void @llvm.ppc.vsx.stxvl(<4 x i32> %a, i8* %b, i64 %c)
360  ret void
361; CHECK-LABEL: testSTXVL
362; CHECK: stxvl 34, 5, 6
363; CHECK: blr
364}
365; Function Attrs: nounwind readnone
366declare void @llvm.ppc.vsx.stxvl(<4 x i32>, i8*, i64)
367
368; Function Attrs: nounwind readnone
369define <4 x i32> @testLXVLL(i8* %a, i64 %b) {
370entry:
371  %0 = tail call <4 x i32> @llvm.ppc.vsx.lxvll(i8* %a, i64 %b)
372  ret <4 x i32> %0
373; CHECK-LABEL: testLXVLL
374; CHECK: lxvll 34, 3, 4
375; CHECK: blr
376}
377; Function Attrs: nounwind readnone
378declare <4 x i32> @llvm.ppc.vsx.lxvll(i8*, i64)
379
380define void @testSTXVLL(<4 x i32> %a, i8* %b, i64 %c) {
381entry:
382  tail call void @llvm.ppc.vsx.stxvll(<4 x i32> %a, i8* %b, i64 %c)
383  ret void
384; CHECK-LABEL: testSTXVLL
385; CHECK: stxvll 34, 5, 6
386; CHECK: blr
387}
388; Function Attrs: nounwind readnone
389declare void @llvm.ppc.vsx.stxvll(<4 x i32>, i8*, i64)
390
391define <4 x i32> @test0(<4 x i32> %a) local_unnamed_addr #0 {
392entry:
393  %sub.i = sub <4 x i32> zeroinitializer, %a
394  ret <4 x i32> %sub.i
395
396; CHECK-LABEL: @test0
397; CHECK: vnegw 2, 2
398; CHECK: blr
399
400}
401
402define <2 x i64> @test1(<2 x i64> %a) local_unnamed_addr #0 {
403entry:
404  %sub.i = sub <2 x i64> zeroinitializer, %a
405  ret <2 x i64> %sub.i
406
407; CHECK-LABEL: @test1
408; CHECK: vnegd 2, 2
409; CHECK: blr
410
411}
412
413declare void @sink(...)
414
415; stack object should be accessed using D-form load/store instead of X-form
416define signext i32 @func1() {
417; CHECK-LABEL: @func1
418; CHECK-NOT: stxvx
419; CHECK: stxv {{[0-9]+}}, {{[0-9]+}}(1)
420; CHECK-NOT: stxvx
421; CHECK: blr
422entry:
423  %a = alloca [4 x i32], align 4
424  %0 = bitcast [4 x i32]* %a to i8*
425  call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 16, i1 false)
426  %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %a, i64 0, i64 0
427  %call = call signext i32 @callee(i32* nonnull %arraydecay) #3
428  ret i32 %call
429}
430
431; stack object should be accessed using D-form load/store instead of X-form
432define signext i32 @func2() {
433; CHECK-LABEL: @func2
434; CHECK-NOT: stxvx
435; CHECK: stxv [[ZEROREG:[0-9]+]], {{[0-9]+}}(1)
436; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
437; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
438; CHECK: stxv [[ZEROREG]], {{[0-9]+}}(1)
439; CHECK-NOT: stxvx
440; CHECK: blr
441entry:
442  %a = alloca [16 x i32], align 4
443  %0 = bitcast [16 x i32]* %a to i8*
444  call void @llvm.memset.p0i8.i64(i8* nonnull align 4 %0, i8 0, i64 64, i1 false)
445  %arraydecay = getelementptr inbounds [16 x i32], [16 x i32]* %a, i64 0, i64 0
446  %call = call signext i32 @callee(i32* nonnull %arraydecay) #3
447  ret i32 %call
448}
449
450declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1) #1
451declare signext i32 @callee(i32*) local_unnamed_addr #2
452