1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
3; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
4; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
5; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -enable-soft-fp128 | FileCheck %s \
6; RUN:   -check-prefix=CHECK-P8
7
8; Function Attrs: norecurse nounwind
9define dso_local void @qpAdd(fp128* nocapture readonly %a, fp128* nocapture %res) {
10; CHECK-LABEL: qpAdd:
11; CHECK:       # %bb.0: # %entry
12; CHECK-NEXT:    lxv v2, 0(r3)
13; CHECK-NEXT:    xsaddqp v2, v2, v2
14; CHECK-NEXT:    stxv v2, 0(r4)
15; CHECK-NEXT:    blr
16;
17; CHECK-P8-LABEL: qpAdd:
18; CHECK-P8:       # %bb.0: # %entry
19; CHECK-P8-NEXT:    mflr r0
20; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
21; CHECK-P8-NEXT:    .cfi_offset lr, 16
22; CHECK-P8-NEXT:    .cfi_offset r30, -16
23; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
24; CHECK-P8-NEXT:    std r0, 16(r1)
25; CHECK-P8-NEXT:    stdu r1, -48(r1)
26; CHECK-P8-NEXT:    lvx v2, 0, r3
27; CHECK-P8-NEXT:    mr r30, r4
28; CHECK-P8-NEXT:    vmr v3, v2
29; CHECK-P8-NEXT:    bl __addkf3
30; CHECK-P8-NEXT:    nop
31; CHECK-P8-NEXT:    stvx v2, 0, r30
32; CHECK-P8-NEXT:    addi r1, r1, 48
33; CHECK-P8-NEXT:    ld r0, 16(r1)
34; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
35; CHECK-P8-NEXT:    mtlr r0
36; CHECK-P8-NEXT:    blr
37entry:
38  %0 = load fp128, fp128* %a, align 16
39  %add = fadd fp128 %0, %0
40  store fp128 %add, fp128* %res, align 16
41  ret void
42}
43
44; Function Attrs: norecurse nounwind
45define dso_local void @qpSub(fp128* nocapture readonly %a, fp128* nocapture %res) {
46; CHECK-LABEL: qpSub:
47; CHECK:       # %bb.0: # %entry
48; CHECK-NEXT:    lxv v2, 0(r3)
49; CHECK-NEXT:    xssubqp v2, v2, v2
50; CHECK-NEXT:    stxv v2, 0(r4)
51; CHECK-NEXT:    blr
52;
53; CHECK-P8-LABEL: qpSub:
54; CHECK-P8:       # %bb.0: # %entry
55; CHECK-P8-NEXT:    mflr r0
56; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
57; CHECK-P8-NEXT:    .cfi_offset lr, 16
58; CHECK-P8-NEXT:    .cfi_offset r30, -16
59; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
60; CHECK-P8-NEXT:    std r0, 16(r1)
61; CHECK-P8-NEXT:    stdu r1, -48(r1)
62; CHECK-P8-NEXT:    lvx v2, 0, r3
63; CHECK-P8-NEXT:    mr r30, r4
64; CHECK-P8-NEXT:    vmr v3, v2
65; CHECK-P8-NEXT:    bl __subkf3
66; CHECK-P8-NEXT:    nop
67; CHECK-P8-NEXT:    stvx v2, 0, r30
68; CHECK-P8-NEXT:    addi r1, r1, 48
69; CHECK-P8-NEXT:    ld r0, 16(r1)
70; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
71; CHECK-P8-NEXT:    mtlr r0
72; CHECK-P8-NEXT:    blr
73entry:
74  %0 = load fp128, fp128* %a, align 16
75  %sub = fsub fp128 %0, %0
76  store fp128 %sub, fp128* %res, align 16
77  ret void
78}
79
80; Function Attrs: norecurse nounwind
81define dso_local void @qpMul(fp128* nocapture readonly %a, fp128* nocapture %res) {
82; CHECK-LABEL: qpMul:
83; CHECK:       # %bb.0: # %entry
84; CHECK-NEXT:    lxv v2, 0(r3)
85; CHECK-NEXT:    xsmulqp v2, v2, v2
86; CHECK-NEXT:    stxv v2, 0(r4)
87; CHECK-NEXT:    blr
88;
89; CHECK-P8-LABEL: qpMul:
90; CHECK-P8:       # %bb.0: # %entry
91; CHECK-P8-NEXT:    mflr r0
92; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
93; CHECK-P8-NEXT:    .cfi_offset lr, 16
94; CHECK-P8-NEXT:    .cfi_offset r30, -16
95; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
96; CHECK-P8-NEXT:    std r0, 16(r1)
97; CHECK-P8-NEXT:    stdu r1, -48(r1)
98; CHECK-P8-NEXT:    lvx v2, 0, r3
99; CHECK-P8-NEXT:    mr r30, r4
100; CHECK-P8-NEXT:    vmr v3, v2
101; CHECK-P8-NEXT:    bl __mulkf3
102; CHECK-P8-NEXT:    nop
103; CHECK-P8-NEXT:    stvx v2, 0, r30
104; CHECK-P8-NEXT:    addi r1, r1, 48
105; CHECK-P8-NEXT:    ld r0, 16(r1)
106; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
107; CHECK-P8-NEXT:    mtlr r0
108; CHECK-P8-NEXT:    blr
109entry:
110  %0 = load fp128, fp128* %a, align 16
111  %mul = fmul fp128 %0, %0
112  store fp128 %mul, fp128* %res, align 16
113  ret void
114}
115
116; Function Attrs: norecurse nounwind
117define dso_local void @qpDiv(fp128* nocapture readonly %a, fp128* nocapture %res) {
118; CHECK-LABEL: qpDiv:
119; CHECK:       # %bb.0: # %entry
120; CHECK-NEXT:    lxv v2, 0(r3)
121; CHECK-NEXT:    xsdivqp v2, v2, v2
122; CHECK-NEXT:    stxv v2, 0(r4)
123; CHECK-NEXT:    blr
124;
125; CHECK-P8-LABEL: qpDiv:
126; CHECK-P8:       # %bb.0: # %entry
127; CHECK-P8-NEXT:    mflr r0
128; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
129; CHECK-P8-NEXT:    .cfi_offset lr, 16
130; CHECK-P8-NEXT:    .cfi_offset r30, -16
131; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
132; CHECK-P8-NEXT:    std r0, 16(r1)
133; CHECK-P8-NEXT:    stdu r1, -48(r1)
134; CHECK-P8-NEXT:    lvx v2, 0, r3
135; CHECK-P8-NEXT:    mr r30, r4
136; CHECK-P8-NEXT:    vmr v3, v2
137; CHECK-P8-NEXT:    bl __divkf3
138; CHECK-P8-NEXT:    nop
139; CHECK-P8-NEXT:    stvx v2, 0, r30
140; CHECK-P8-NEXT:    addi r1, r1, 48
141; CHECK-P8-NEXT:    ld r0, 16(r1)
142; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
143; CHECK-P8-NEXT:    mtlr r0
144; CHECK-P8-NEXT:    blr
145entry:
146  %0 = load fp128, fp128* %a, align 16
147  %div = fdiv fp128 %0, %0
148  store fp128 %div, fp128* %res, align 16
149  ret void
150}
151
152define dso_local void @testLdNSt(i8* nocapture readonly %PtrC, fp128* nocapture %PtrF) {
153; CHECK-LABEL: testLdNSt:
154; CHECK:       # %bb.0: # %entry
155; CHECK-NEXT:    addi r3, r3, 4
156; CHECK-NEXT:    addi r4, r4, 8
157; CHECK-NEXT:    lxvx vs0, 0, r3
158; CHECK-NEXT:    stxvx vs0, 0, r4
159; CHECK-NEXT:    blr
160;
161; CHECK-P8-LABEL: testLdNSt:
162; CHECK-P8:       # %bb.0: # %entry
163; CHECK-P8-NEXT:    addi r3, r3, 4
164; CHECK-P8-NEXT:    lvx v2, 0, r3
165; CHECK-P8-NEXT:    addi r3, r4, 8
166; CHECK-P8-NEXT:    stvx v2, 0, r3
167; CHECK-P8-NEXT:    blr
168entry:
169  %add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4
170  %0 = bitcast i8* %add.ptr to fp128*
171  %1 = load fp128, fp128* %0, align 16
172  %2 = bitcast fp128* %PtrF to i8*
173  %add.ptr1 = getelementptr inbounds i8, i8* %2, i64 8
174  %3 = bitcast i8* %add.ptr1 to fp128*
175  store fp128 %1, fp128* %3, align 16
176  ret void
177}
178
179define dso_local void @qpSqrt(fp128* nocapture readonly %a, fp128* nocapture %res) {
180; CHECK-LABEL: qpSqrt:
181; CHECK:       # %bb.0: # %entry
182; CHECK-NEXT:    lxv v2, 0(r3)
183; CHECK-NEXT:    xssqrtqp v2, v2
184; CHECK-NEXT:    stxv v2, 0(r4)
185; CHECK-NEXT:    blr
186;
187; CHECK-P8-LABEL: qpSqrt:
188; CHECK-P8:       # %bb.0: # %entry
189; CHECK-P8-NEXT:    mflr r0
190; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
191; CHECK-P8-NEXT:    .cfi_offset lr, 16
192; CHECK-P8-NEXT:    .cfi_offset r30, -16
193; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
194; CHECK-P8-NEXT:    std r0, 16(r1)
195; CHECK-P8-NEXT:    stdu r1, -48(r1)
196; CHECK-P8-NEXT:    lvx v2, 0, r3
197; CHECK-P8-NEXT:    mr r30, r4
198; CHECK-P8-NEXT:    bl sqrtf128
199; CHECK-P8-NEXT:    nop
200; CHECK-P8-NEXT:    stvx v2, 0, r30
201; CHECK-P8-NEXT:    addi r1, r1, 48
202; CHECK-P8-NEXT:    ld r0, 16(r1)
203; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
204; CHECK-P8-NEXT:    mtlr r0
205; CHECK-P8-NEXT:    blr
206entry:
207  %0 = load fp128, fp128* %a, align 16
208  %1 = tail call fp128 @llvm.sqrt.f128(fp128 %0)
209  store fp128 %1, fp128* %res, align 16
210  ret void
211
212}
213declare fp128 @llvm.sqrt.f128(fp128 %Val)
214
215define dso_local void @qpCpsgn(fp128* nocapture readonly %a, fp128* nocapture readonly %b,
216; CHECK-LABEL: qpCpsgn:
217; CHECK:       # %bb.0: # %entry
218; CHECK-NEXT:    lxv v2, 0(r3)
219; CHECK-NEXT:    lxv v3, 0(r4)
220; CHECK-NEXT:    xscpsgnqp v2, v3, v2
221; CHECK-NEXT:    stxv v2, 0(r5)
222; CHECK-NEXT:    blr
223;
224; CHECK-P8-LABEL: qpCpsgn:
225; CHECK-P8:       # %bb.0: # %entry
226; CHECK-P8-NEXT:    lvx v2, 0, r3
227; CHECK-P8-NEXT:    lvx v3, 0, r4
228; CHECK-P8-NEXT:    addi r3, r1, -16
229; CHECK-P8-NEXT:    addi r4, r1, -32
230; CHECK-P8-NEXT:    stvx v3, 0, r3
231; CHECK-P8-NEXT:    stvx v2, 0, r4
232; CHECK-P8-NEXT:    lbz r3, -1(r1)
233; CHECK-P8-NEXT:    lbz r6, -17(r1)
234; CHECK-P8-NEXT:    rlwimi r6, r3, 0, 0, 24
235; CHECK-P8-NEXT:    stb r6, -17(r1)
236; CHECK-P8-NEXT:    lvx v2, 0, r4
237; CHECK-P8-NEXT:    stvx v2, 0, r5
238; CHECK-P8-NEXT:    blr
239                     fp128* nocapture %res) {
240entry:
241  %0 = load fp128, fp128* %a, align 16
242  %1 = load fp128, fp128* %b, align 16
243  %2 = tail call fp128 @llvm.copysign.f128(fp128 %0, fp128 %1)
244  store fp128 %2, fp128* %res, align 16
245  ret void
246
247}
248declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
249
250define dso_local void @qpAbs(fp128* nocapture readonly %a, fp128* nocapture %res) {
251; CHECK-LABEL: qpAbs:
252; CHECK:       # %bb.0: # %entry
253; CHECK-NEXT:    lxv v2, 0(r3)
254; CHECK-NEXT:    xsabsqp v2, v2
255; CHECK-NEXT:    stxv v2, 0(r4)
256; CHECK-NEXT:    blr
257;
258; CHECK-P8-LABEL: qpAbs:
259; CHECK-P8:       # %bb.0: # %entry
260; CHECK-P8-NEXT:    lvx v2, 0, r3
261; CHECK-P8-NEXT:    addi r3, r1, -16
262; CHECK-P8-NEXT:    stvx v2, 0, r3
263; CHECK-P8-NEXT:    lbz r5, -1(r1)
264; CHECK-P8-NEXT:    clrlwi r5, r5, 25
265; CHECK-P8-NEXT:    stb r5, -1(r1)
266; CHECK-P8-NEXT:    lvx v2, 0, r3
267; CHECK-P8-NEXT:    stvx v2, 0, r4
268; CHECK-P8-NEXT:    blr
269entry:
270  %0 = load fp128, fp128* %a, align 16
271  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
272  store fp128 %1, fp128* %res, align 16
273  ret void
274
275}
276declare fp128 @llvm.fabs.f128(fp128 %Val)
277
278define dso_local void @qpNAbs(fp128* nocapture readonly %a, fp128* nocapture %res) {
279; CHECK-LABEL: qpNAbs:
280; CHECK:       # %bb.0: # %entry
281; CHECK-NEXT:    lxv v2, 0(r3)
282; CHECK-NEXT:    xsnabsqp v2, v2
283; CHECK-NEXT:    stxv v2, 0(r4)
284; CHECK-NEXT:    blr
285;
286; CHECK-P8-LABEL: qpNAbs:
287; CHECK-P8:       # %bb.0: # %entry
288; CHECK-P8-NEXT:    lvx v2, 0, r3
289; CHECK-P8-NEXT:    addi r3, r1, -32
290; CHECK-P8-NEXT:    stvx v2, 0, r3
291; CHECK-P8-NEXT:    lbz r5, -17(r1)
292; CHECK-P8-NEXT:    clrlwi r5, r5, 25
293; CHECK-P8-NEXT:    stb r5, -17(r1)
294; CHECK-P8-NEXT:    lvx v2, 0, r3
295; CHECK-P8-NEXT:    addi r3, r1, -16
296; CHECK-P8-NEXT:    stvx v2, 0, r3
297; CHECK-P8-NEXT:    lbz r5, -1(r1)
298; CHECK-P8-NEXT:    xori r5, r5, 128
299; CHECK-P8-NEXT:    stb r5, -1(r1)
300; CHECK-P8-NEXT:    lvx v2, 0, r3
301; CHECK-P8-NEXT:    stvx v2, 0, r4
302; CHECK-P8-NEXT:    blr
303entry:
304  %0 = load fp128, fp128* %a, align 16
305  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
306  %neg = fsub fp128 0xL00000000000000008000000000000000, %1
307  store fp128 %neg, fp128* %res, align 16
308  ret void
309
310}
311
312define dso_local void @qpNeg(fp128* nocapture readonly %a, fp128* nocapture %res) {
313; CHECK-LABEL: qpNeg:
314; CHECK:       # %bb.0: # %entry
315; CHECK-NEXT:    lxv v2, 0(r3)
316; CHECK-NEXT:    xsnegqp v2, v2
317; CHECK-NEXT:    stxv v2, 0(r4)
318; CHECK-NEXT:    blr
319;
320; CHECK-P8-LABEL: qpNeg:
321; CHECK-P8:       # %bb.0: # %entry
322; CHECK-P8-NEXT:    lvx v2, 0, r3
323; CHECK-P8-NEXT:    addi r3, r1, -16
324; CHECK-P8-NEXT:    stvx v2, 0, r3
325; CHECK-P8-NEXT:    lbz r5, -1(r1)
326; CHECK-P8-NEXT:    xori r5, r5, 128
327; CHECK-P8-NEXT:    stb r5, -1(r1)
328; CHECK-P8-NEXT:    lvx v2, 0, r3
329; CHECK-P8-NEXT:    stvx v2, 0, r4
330; CHECK-P8-NEXT:    blr
331entry:
332  %0 = load fp128, fp128* %a, align 16
333  %sub = fsub fp128 0xL00000000000000008000000000000000, %0
334  store fp128 %sub, fp128* %res, align 16
335  ret void
336
337}
338
339define fp128 @qp_sin(fp128* nocapture readonly %a) {
340; CHECK-LABEL: qp_sin:
341; CHECK:       # %bb.0: # %entry
342; CHECK-NEXT:    mflr r0
343; CHECK-NEXT:    std r0, 16(r1)
344; CHECK-NEXT:    stdu r1, -32(r1)
345; CHECK-NEXT:    .cfi_def_cfa_offset 32
346; CHECK-NEXT:    .cfi_offset lr, 16
347; CHECK-NEXT:    lxv v2, 0(r3)
348; CHECK-NEXT:    bl sinf128
349; CHECK-NEXT:    nop
350; CHECK-NEXT:    addi r1, r1, 32
351; CHECK-NEXT:    ld r0, 16(r1)
352; CHECK-NEXT:    mtlr r0
353; CHECK-NEXT:    blr
354;
355; CHECK-P8-LABEL: qp_sin:
356; CHECK-P8:       # %bb.0: # %entry
357; CHECK-P8-NEXT:    mflr r0
358; CHECK-P8-NEXT:    std r0, 16(r1)
359; CHECK-P8-NEXT:    stdu r1, -32(r1)
360; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
361; CHECK-P8-NEXT:    .cfi_offset lr, 16
362; CHECK-P8-NEXT:    lvx v2, 0, r3
363; CHECK-P8-NEXT:    bl sinf128
364; CHECK-P8-NEXT:    nop
365; CHECK-P8-NEXT:    addi r1, r1, 32
366; CHECK-P8-NEXT:    ld r0, 16(r1)
367; CHECK-P8-NEXT:    mtlr r0
368; CHECK-P8-NEXT:    blr
369entry:
370  %0 = load fp128, fp128* %a, align 16
371  %1 = tail call fp128 @llvm.sin.f128(fp128 %0)
372  ret fp128 %1
373}
374declare fp128 @llvm.sin.f128(fp128 %Val)
375
376define fp128 @qp_cos(fp128* nocapture readonly %a) {
377; CHECK-LABEL: qp_cos:
378; CHECK:       # %bb.0: # %entry
379; CHECK-NEXT:    mflr r0
380; CHECK-NEXT:    std r0, 16(r1)
381; CHECK-NEXT:    stdu r1, -32(r1)
382; CHECK-NEXT:    .cfi_def_cfa_offset 32
383; CHECK-NEXT:    .cfi_offset lr, 16
384; CHECK-NEXT:    lxv v2, 0(r3)
385; CHECK-NEXT:    bl cosf128
386; CHECK-NEXT:    nop
387; CHECK-NEXT:    addi r1, r1, 32
388; CHECK-NEXT:    ld r0, 16(r1)
389; CHECK-NEXT:    mtlr r0
390; CHECK-NEXT:    blr
391;
392; CHECK-P8-LABEL: qp_cos:
393; CHECK-P8:       # %bb.0: # %entry
394; CHECK-P8-NEXT:    mflr r0
395; CHECK-P8-NEXT:    std r0, 16(r1)
396; CHECK-P8-NEXT:    stdu r1, -32(r1)
397; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
398; CHECK-P8-NEXT:    .cfi_offset lr, 16
399; CHECK-P8-NEXT:    lvx v2, 0, r3
400; CHECK-P8-NEXT:    bl cosf128
401; CHECK-P8-NEXT:    nop
402; CHECK-P8-NEXT:    addi r1, r1, 32
403; CHECK-P8-NEXT:    ld r0, 16(r1)
404; CHECK-P8-NEXT:    mtlr r0
405; CHECK-P8-NEXT:    blr
406entry:
407  %0 = load fp128, fp128* %a, align 16
408  %1 = tail call fp128 @llvm.cos.f128(fp128 %0)
409  ret fp128 %1
410}
411declare fp128 @llvm.cos.f128(fp128 %Val)
412
413define fp128 @qp_log(fp128* nocapture readonly %a) {
414; CHECK-LABEL: qp_log:
415; CHECK:       # %bb.0: # %entry
416; CHECK-NEXT:    mflr r0
417; CHECK-NEXT:    std r0, 16(r1)
418; CHECK-NEXT:    stdu r1, -32(r1)
419; CHECK-NEXT:    .cfi_def_cfa_offset 32
420; CHECK-NEXT:    .cfi_offset lr, 16
421; CHECK-NEXT:    lxv v2, 0(r3)
422; CHECK-NEXT:    bl logf128
423; CHECK-NEXT:    nop
424; CHECK-NEXT:    addi r1, r1, 32
425; CHECK-NEXT:    ld r0, 16(r1)
426; CHECK-NEXT:    mtlr r0
427; CHECK-NEXT:    blr
428;
429; CHECK-P8-LABEL: qp_log:
430; CHECK-P8:       # %bb.0: # %entry
431; CHECK-P8-NEXT:    mflr r0
432; CHECK-P8-NEXT:    std r0, 16(r1)
433; CHECK-P8-NEXT:    stdu r1, -32(r1)
434; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
435; CHECK-P8-NEXT:    .cfi_offset lr, 16
436; CHECK-P8-NEXT:    lvx v2, 0, r3
437; CHECK-P8-NEXT:    bl logf128
438; CHECK-P8-NEXT:    nop
439; CHECK-P8-NEXT:    addi r1, r1, 32
440; CHECK-P8-NEXT:    ld r0, 16(r1)
441; CHECK-P8-NEXT:    mtlr r0
442; CHECK-P8-NEXT:    blr
443entry:
444  %0 = load fp128, fp128* %a, align 16
445  %1 = tail call fp128 @llvm.log.f128(fp128 %0)
446  ret fp128 %1
447}
448declare fp128     @llvm.log.f128(fp128 %Val)
449
450define fp128 @qp_log10(fp128* nocapture readonly %a) {
451; CHECK-LABEL: qp_log10:
452; CHECK:       # %bb.0: # %entry
453; CHECK-NEXT:    mflr r0
454; CHECK-NEXT:    std r0, 16(r1)
455; CHECK-NEXT:    stdu r1, -32(r1)
456; CHECK-NEXT:    .cfi_def_cfa_offset 32
457; CHECK-NEXT:    .cfi_offset lr, 16
458; CHECK-NEXT:    lxv v2, 0(r3)
459; CHECK-NEXT:    bl log10f128
460; CHECK-NEXT:    nop
461; CHECK-NEXT:    addi r1, r1, 32
462; CHECK-NEXT:    ld r0, 16(r1)
463; CHECK-NEXT:    mtlr r0
464; CHECK-NEXT:    blr
465;
466; CHECK-P8-LABEL: qp_log10:
467; CHECK-P8:       # %bb.0: # %entry
468; CHECK-P8-NEXT:    mflr r0
469; CHECK-P8-NEXT:    std r0, 16(r1)
470; CHECK-P8-NEXT:    stdu r1, -32(r1)
471; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
472; CHECK-P8-NEXT:    .cfi_offset lr, 16
473; CHECK-P8-NEXT:    lvx v2, 0, r3
474; CHECK-P8-NEXT:    bl log10f128
475; CHECK-P8-NEXT:    nop
476; CHECK-P8-NEXT:    addi r1, r1, 32
477; CHECK-P8-NEXT:    ld r0, 16(r1)
478; CHECK-P8-NEXT:    mtlr r0
479; CHECK-P8-NEXT:    blr
480entry:
481  %0 = load fp128, fp128* %a, align 16
482  %1 = tail call fp128 @llvm.log10.f128(fp128 %0)
483  ret fp128 %1
484}
485declare fp128     @llvm.log10.f128(fp128 %Val)
486
487define fp128 @qp_log2(fp128* nocapture readonly %a) {
488; CHECK-LABEL: qp_log2:
489; CHECK:       # %bb.0: # %entry
490; CHECK-NEXT:    mflr r0
491; CHECK-NEXT:    std r0, 16(r1)
492; CHECK-NEXT:    stdu r1, -32(r1)
493; CHECK-NEXT:    .cfi_def_cfa_offset 32
494; CHECK-NEXT:    .cfi_offset lr, 16
495; CHECK-NEXT:    lxv v2, 0(r3)
496; CHECK-NEXT:    bl log2f128
497; CHECK-NEXT:    nop
498; CHECK-NEXT:    addi r1, r1, 32
499; CHECK-NEXT:    ld r0, 16(r1)
500; CHECK-NEXT:    mtlr r0
501; CHECK-NEXT:    blr
502;
503; CHECK-P8-LABEL: qp_log2:
504; CHECK-P8:       # %bb.0: # %entry
505; CHECK-P8-NEXT:    mflr r0
506; CHECK-P8-NEXT:    std r0, 16(r1)
507; CHECK-P8-NEXT:    stdu r1, -32(r1)
508; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
509; CHECK-P8-NEXT:    .cfi_offset lr, 16
510; CHECK-P8-NEXT:    lvx v2, 0, r3
511; CHECK-P8-NEXT:    bl log2f128
512; CHECK-P8-NEXT:    nop
513; CHECK-P8-NEXT:    addi r1, r1, 32
514; CHECK-P8-NEXT:    ld r0, 16(r1)
515; CHECK-P8-NEXT:    mtlr r0
516; CHECK-P8-NEXT:    blr
517entry:
518  %0 = load fp128, fp128* %a, align 16
519  %1 = tail call fp128 @llvm.log2.f128(fp128 %0)
520  ret fp128 %1
521}
522declare fp128     @llvm.log2.f128(fp128 %Val)
523
524define fp128 @qp_minnum(fp128* nocapture readonly %a,
525; CHECK-LABEL: qp_minnum:
526; CHECK:       # %bb.0: # %entry
527; CHECK-NEXT:    mflr r0
528; CHECK-NEXT:    std r0, 16(r1)
529; CHECK-NEXT:    stdu r1, -32(r1)
530; CHECK-NEXT:    .cfi_def_cfa_offset 32
531; CHECK-NEXT:    .cfi_offset lr, 16
532; CHECK-NEXT:    lxv v2, 0(r3)
533; CHECK-NEXT:    lxv v3, 0(r4)
534; CHECK-NEXT:    bl fminf128
535; CHECK-NEXT:    nop
536; CHECK-NEXT:    addi r1, r1, 32
537; CHECK-NEXT:    ld r0, 16(r1)
538; CHECK-NEXT:    mtlr r0
539; CHECK-NEXT:    blr
540;
541; CHECK-P8-LABEL: qp_minnum:
542; CHECK-P8:       # %bb.0: # %entry
543; CHECK-P8-NEXT:    mflr r0
544; CHECK-P8-NEXT:    std r0, 16(r1)
545; CHECK-P8-NEXT:    stdu r1, -32(r1)
546; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
547; CHECK-P8-NEXT:    .cfi_offset lr, 16
548; CHECK-P8-NEXT:    lvx v2, 0, r3
549; CHECK-P8-NEXT:    lvx v3, 0, r4
550; CHECK-P8-NEXT:    bl fminf128
551; CHECK-P8-NEXT:    nop
552; CHECK-P8-NEXT:    addi r1, r1, 32
553; CHECK-P8-NEXT:    ld r0, 16(r1)
554; CHECK-P8-NEXT:    mtlr r0
555; CHECK-P8-NEXT:    blr
556                        fp128* nocapture readonly %b) {
557entry:
558  %0 = load fp128, fp128* %a, align 16
559  %1 = load fp128, fp128* %b, align 16
560  %2 = tail call fp128 @llvm.minnum.f128(fp128 %0, fp128 %1)
561  ret fp128 %2
562}
563declare fp128     @llvm.minnum.f128(fp128 %Val0, fp128 %Val1)
564
565define fp128 @qp_maxnum(fp128* nocapture readonly %a,
566; CHECK-LABEL: qp_maxnum:
567; CHECK:       # %bb.0: # %entry
568; CHECK-NEXT:    mflr r0
569; CHECK-NEXT:    std r0, 16(r1)
570; CHECK-NEXT:    stdu r1, -32(r1)
571; CHECK-NEXT:    .cfi_def_cfa_offset 32
572; CHECK-NEXT:    .cfi_offset lr, 16
573; CHECK-NEXT:    lxv v2, 0(r3)
574; CHECK-NEXT:    lxv v3, 0(r4)
575; CHECK-NEXT:    bl fmaxf128
576; CHECK-NEXT:    nop
577; CHECK-NEXT:    addi r1, r1, 32
578; CHECK-NEXT:    ld r0, 16(r1)
579; CHECK-NEXT:    mtlr r0
580; CHECK-NEXT:    blr
581;
582; CHECK-P8-LABEL: qp_maxnum:
583; CHECK-P8:       # %bb.0: # %entry
584; CHECK-P8-NEXT:    mflr r0
585; CHECK-P8-NEXT:    std r0, 16(r1)
586; CHECK-P8-NEXT:    stdu r1, -32(r1)
587; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
588; CHECK-P8-NEXT:    .cfi_offset lr, 16
589; CHECK-P8-NEXT:    lvx v2, 0, r3
590; CHECK-P8-NEXT:    lvx v3, 0, r4
591; CHECK-P8-NEXT:    bl fmaxf128
592; CHECK-P8-NEXT:    nop
593; CHECK-P8-NEXT:    addi r1, r1, 32
594; CHECK-P8-NEXT:    ld r0, 16(r1)
595; CHECK-P8-NEXT:    mtlr r0
596; CHECK-P8-NEXT:    blr
597                        fp128* nocapture readonly %b) {
598entry:
599  %0 = load fp128, fp128* %a, align 16
600  %1 = load fp128, fp128* %b, align 16
601  %2 = tail call fp128 @llvm.maxnum.f128(fp128 %0, fp128 %1)
602  ret fp128 %2
603}
604declare fp128     @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1)
605
606define fp128 @qp_pow(fp128* nocapture readonly %a,
607; CHECK-LABEL: qp_pow:
608; CHECK:       # %bb.0: # %entry
609; CHECK-NEXT:    mflr r0
610; CHECK-NEXT:    std r0, 16(r1)
611; CHECK-NEXT:    stdu r1, -32(r1)
612; CHECK-NEXT:    .cfi_def_cfa_offset 32
613; CHECK-NEXT:    .cfi_offset lr, 16
614; CHECK-NEXT:    lxv v2, 0(r3)
615; CHECK-NEXT:    lxv v3, 0(r4)
616; CHECK-NEXT:    bl powf128
617; CHECK-NEXT:    nop
618; CHECK-NEXT:    addi r1, r1, 32
619; CHECK-NEXT:    ld r0, 16(r1)
620; CHECK-NEXT:    mtlr r0
621; CHECK-NEXT:    blr
622;
623; CHECK-P8-LABEL: qp_pow:
624; CHECK-P8:       # %bb.0: # %entry
625; CHECK-P8-NEXT:    mflr r0
626; CHECK-P8-NEXT:    std r0, 16(r1)
627; CHECK-P8-NEXT:    stdu r1, -32(r1)
628; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
629; CHECK-P8-NEXT:    .cfi_offset lr, 16
630; CHECK-P8-NEXT:    lvx v2, 0, r3
631; CHECK-P8-NEXT:    lvx v3, 0, r4
632; CHECK-P8-NEXT:    bl powf128
633; CHECK-P8-NEXT:    nop
634; CHECK-P8-NEXT:    addi r1, r1, 32
635; CHECK-P8-NEXT:    ld r0, 16(r1)
636; CHECK-P8-NEXT:    mtlr r0
637; CHECK-P8-NEXT:    blr
638                     fp128* nocapture readonly %b) {
639entry:
640  %0 = load fp128, fp128* %a, align 16
641  %1 = load fp128, fp128* %b, align 16
642  %2 = tail call fp128 @llvm.pow.f128(fp128 %0, fp128 %1)
643  ret fp128 %2
644}
645declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power)
646
647define fp128 @qp_exp(fp128* nocapture readonly %a) {
648; CHECK-LABEL: qp_exp:
649; CHECK:       # %bb.0: # %entry
650; CHECK-NEXT:    mflr r0
651; CHECK-NEXT:    std r0, 16(r1)
652; CHECK-NEXT:    stdu r1, -32(r1)
653; CHECK-NEXT:    .cfi_def_cfa_offset 32
654; CHECK-NEXT:    .cfi_offset lr, 16
655; CHECK-NEXT:    lxv v2, 0(r3)
656; CHECK-NEXT:    bl expf128
657; CHECK-NEXT:    nop
658; CHECK-NEXT:    addi r1, r1, 32
659; CHECK-NEXT:    ld r0, 16(r1)
660; CHECK-NEXT:    mtlr r0
661; CHECK-NEXT:    blr
662;
663; CHECK-P8-LABEL: qp_exp:
664; CHECK-P8:       # %bb.0: # %entry
665; CHECK-P8-NEXT:    mflr r0
666; CHECK-P8-NEXT:    std r0, 16(r1)
667; CHECK-P8-NEXT:    stdu r1, -32(r1)
668; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
669; CHECK-P8-NEXT:    .cfi_offset lr, 16
670; CHECK-P8-NEXT:    lvx v2, 0, r3
671; CHECK-P8-NEXT:    bl expf128
672; CHECK-P8-NEXT:    nop
673; CHECK-P8-NEXT:    addi r1, r1, 32
674; CHECK-P8-NEXT:    ld r0, 16(r1)
675; CHECK-P8-NEXT:    mtlr r0
676; CHECK-P8-NEXT:    blr
677entry:
678  %0 = load fp128, fp128* %a, align 16
679  %1 = tail call fp128 @llvm.exp.f128(fp128 %0)
680  ret fp128 %1
681}
682declare fp128     @llvm.exp.f128(fp128 %Val)
683
684define fp128 @qp_exp2(fp128* nocapture readonly %a) {
685; CHECK-LABEL: qp_exp2:
686; CHECK:       # %bb.0: # %entry
687; CHECK-NEXT:    mflr r0
688; CHECK-NEXT:    std r0, 16(r1)
689; CHECK-NEXT:    stdu r1, -32(r1)
690; CHECK-NEXT:    .cfi_def_cfa_offset 32
691; CHECK-NEXT:    .cfi_offset lr, 16
692; CHECK-NEXT:    lxv v2, 0(r3)
693; CHECK-NEXT:    bl exp2f128
694; CHECK-NEXT:    nop
695; CHECK-NEXT:    addi r1, r1, 32
696; CHECK-NEXT:    ld r0, 16(r1)
697; CHECK-NEXT:    mtlr r0
698; CHECK-NEXT:    blr
699;
700; CHECK-P8-LABEL: qp_exp2:
701; CHECK-P8:       # %bb.0: # %entry
702; CHECK-P8-NEXT:    mflr r0
703; CHECK-P8-NEXT:    std r0, 16(r1)
704; CHECK-P8-NEXT:    stdu r1, -32(r1)
705; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
706; CHECK-P8-NEXT:    .cfi_offset lr, 16
707; CHECK-P8-NEXT:    lvx v2, 0, r3
708; CHECK-P8-NEXT:    bl exp2f128
709; CHECK-P8-NEXT:    nop
710; CHECK-P8-NEXT:    addi r1, r1, 32
711; CHECK-P8-NEXT:    ld r0, 16(r1)
712; CHECK-P8-NEXT:    mtlr r0
713; CHECK-P8-NEXT:    blr
714entry:
715  %0 = load fp128, fp128* %a, align 16
716  %1 = tail call fp128 @llvm.exp2.f128(fp128 %0)
717  ret fp128 %1
718}
719declare fp128     @llvm.exp2.f128(fp128 %Val)
720
721define dso_local void @qp_powi(fp128* nocapture readonly %a, i32* nocapture readonly %b,
722; CHECK-LABEL: qp_powi:
723; CHECK:       # %bb.0: # %entry
724; CHECK-NEXT:    mflr r0
725; CHECK-NEXT:    .cfi_def_cfa_offset 48
726; CHECK-NEXT:    .cfi_offset lr, 16
727; CHECK-NEXT:    .cfi_offset r30, -16
728; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
729; CHECK-NEXT:    std r0, 16(r1)
730; CHECK-NEXT:    stdu r1, -48(r1)
731; CHECK-NEXT:    lxv v2, 0(r3)
732; CHECK-NEXT:    mr r30, r5
733; CHECK-NEXT:    lwz r5, 0(r4)
734; CHECK-NEXT:    bl __powikf2
735; CHECK-NEXT:    nop
736; CHECK-NEXT:    stxv v2, 0(r30)
737; CHECK-NEXT:    addi r1, r1, 48
738; CHECK-NEXT:    ld r0, 16(r1)
739; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
740; CHECK-NEXT:    mtlr r0
741; CHECK-NEXT:    blr
742;
743; CHECK-P8-LABEL: qp_powi:
744; CHECK-P8:       # %bb.0: # %entry
745; CHECK-P8-NEXT:    mflr r0
746; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
747; CHECK-P8-NEXT:    .cfi_offset lr, 16
748; CHECK-P8-NEXT:    .cfi_offset r30, -16
749; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
750; CHECK-P8-NEXT:    std r0, 16(r1)
751; CHECK-P8-NEXT:    stdu r1, -48(r1)
752; CHECK-P8-NEXT:    lvx v2, 0, r3
753; CHECK-P8-NEXT:    lwz r3, 0(r4)
754; CHECK-P8-NEXT:    mr r30, r5
755; CHECK-P8-NEXT:    mr r5, r3
756; CHECK-P8-NEXT:    bl __powikf2
757; CHECK-P8-NEXT:    nop
758; CHECK-P8-NEXT:    stvx v2, 0, r30
759; CHECK-P8-NEXT:    addi r1, r1, 48
760; CHECK-P8-NEXT:    ld r0, 16(r1)
761; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
762; CHECK-P8-NEXT:    mtlr r0
763; CHECK-P8-NEXT:    blr
764                     fp128* nocapture %res) {
765entry:
766  %0 = load fp128, fp128* %a, align 16
767  %1 = load i32, i32* %b, align 8
768  %2 = tail call fp128 @llvm.powi.f128(fp128 %0, i32 %1)
769  store fp128 %2, fp128* %res, align 16
770  ret void
771}
772declare fp128 @llvm.powi.f128(fp128 %Val, i32 %power)
773
774@a = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
775@b = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
776
777define fp128 @qp_frem() #0 {
778; CHECK-LABEL: qp_frem:
779; CHECK:       # %bb.0: # %entry
780; CHECK-NEXT:    mflr r0
781; CHECK-NEXT:    std r0, 16(r1)
782; CHECK-NEXT:    stdu r1, -32(r1)
783; CHECK-NEXT:    .cfi_def_cfa_offset 32
784; CHECK-NEXT:    .cfi_offset lr, 16
785; CHECK-NEXT:    addis r3, r2, a@toc@ha
786; CHECK-NEXT:    addi r3, r3, a@toc@l
787; CHECK-NEXT:    lxvx v2, 0, r3
788; CHECK-NEXT:    addis r3, r2, b@toc@ha
789; CHECK-NEXT:    addi r3, r3, b@toc@l
790; CHECK-NEXT:    lxvx v3, 0, r3
791; CHECK-NEXT:    bl fmodf128
792; CHECK-NEXT:    nop
793; CHECK-NEXT:    addi r1, r1, 32
794; CHECK-NEXT:    ld r0, 16(r1)
795; CHECK-NEXT:    mtlr r0
796; CHECK-NEXT:    blr
797;
798; CHECK-P8-LABEL: qp_frem:
799; CHECK-P8:       # %bb.0: # %entry
800; CHECK-P8-NEXT:    mflr r0
801; CHECK-P8-NEXT:    std r0, 16(r1)
802; CHECK-P8-NEXT:    stdu r1, -32(r1)
803; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
804; CHECK-P8-NEXT:    .cfi_offset lr, 16
805; CHECK-P8-NEXT:    addis r3, r2, a@toc@ha
806; CHECK-P8-NEXT:    addis r4, r2, b@toc@ha
807; CHECK-P8-NEXT:    addi r3, r3, a@toc@l
808; CHECK-P8-NEXT:    addi r4, r4, b@toc@l
809; CHECK-P8-NEXT:    lvx v2, 0, r3
810; CHECK-P8-NEXT:    lvx v3, 0, r4
811; CHECK-P8-NEXT:    bl fmodf128
812; CHECK-P8-NEXT:    nop
813; CHECK-P8-NEXT:    addi r1, r1, 32
814; CHECK-P8-NEXT:    ld r0, 16(r1)
815; CHECK-P8-NEXT:    mtlr r0
816; CHECK-P8-NEXT:    blr
817entry:
818  %0 = load fp128, fp128* @a, align 16
819  %1 = load fp128, fp128* @b, align 16
820  %rem = frem fp128 %0, %1
821  ret fp128 %rem
822}
823
824define dso_local void @qpCeil(fp128* nocapture readonly %a, fp128* nocapture %res) {
825; CHECK-LABEL: qpCeil:
826; CHECK:       # %bb.0: # %entry
827; CHECK-NEXT:    lxv v2, 0(r3)
828; CHECK-NEXT:    xsrqpi 1, v2, v2, 2
829; CHECK-NEXT:    stxv v2, 0(r4)
830; CHECK-NEXT:    blr
831;
832; CHECK-P8-LABEL: qpCeil:
833; CHECK-P8:       # %bb.0: # %entry
834; CHECK-P8-NEXT:    mflr r0
835; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
836; CHECK-P8-NEXT:    .cfi_offset lr, 16
837; CHECK-P8-NEXT:    .cfi_offset r30, -16
838; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
839; CHECK-P8-NEXT:    std r0, 16(r1)
840; CHECK-P8-NEXT:    stdu r1, -48(r1)
841; CHECK-P8-NEXT:    lvx v2, 0, r3
842; CHECK-P8-NEXT:    mr r30, r4
843; CHECK-P8-NEXT:    bl ceilf128
844; CHECK-P8-NEXT:    nop
845; CHECK-P8-NEXT:    stvx v2, 0, r30
846; CHECK-P8-NEXT:    addi r1, r1, 48
847; CHECK-P8-NEXT:    ld r0, 16(r1)
848; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
849; CHECK-P8-NEXT:    mtlr r0
850; CHECK-P8-NEXT:    blr
851entry:
852  %0 = load fp128, fp128* %a, align 16
853  %1 = tail call fp128 @llvm.ceil.f128(fp128 %0)
854  store fp128 %1, fp128* %res, align 16
855  ret void
856}
857declare fp128 @llvm.ceil.f128(fp128 %Val)
858
859define dso_local void @qpFloor(fp128* nocapture readonly %a, fp128* nocapture %res) {
860; CHECK-LABEL: qpFloor:
861; CHECK:       # %bb.0: # %entry
862; CHECK-NEXT:    lxv v2, 0(r3)
863; CHECK-NEXT:    xsrqpi 1, v2, v2, 3
864; CHECK-NEXT:    stxv v2, 0(r4)
865; CHECK-NEXT:    blr
866;
867; CHECK-P8-LABEL: qpFloor:
868; CHECK-P8:       # %bb.0: # %entry
869; CHECK-P8-NEXT:    mflr r0
870; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
871; CHECK-P8-NEXT:    .cfi_offset lr, 16
872; CHECK-P8-NEXT:    .cfi_offset r30, -16
873; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
874; CHECK-P8-NEXT:    std r0, 16(r1)
875; CHECK-P8-NEXT:    stdu r1, -48(r1)
876; CHECK-P8-NEXT:    lvx v2, 0, r3
877; CHECK-P8-NEXT:    mr r30, r4
878; CHECK-P8-NEXT:    bl floorf128
879; CHECK-P8-NEXT:    nop
880; CHECK-P8-NEXT:    stvx v2, 0, r30
881; CHECK-P8-NEXT:    addi r1, r1, 48
882; CHECK-P8-NEXT:    ld r0, 16(r1)
883; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
884; CHECK-P8-NEXT:    mtlr r0
885; CHECK-P8-NEXT:    blr
886entry:
887  %0 = load fp128, fp128* %a, align 16
888  %1 = tail call fp128 @llvm.floor.f128(fp128 %0)
889  store fp128 %1, fp128* %res, align 16
890  ret void
891}
892declare fp128 @llvm.floor.f128(fp128 %Val)
893
894define dso_local void @qpTrunc(fp128* nocapture readonly %a, fp128* nocapture %res) {
895; CHECK-LABEL: qpTrunc:
896; CHECK:       # %bb.0: # %entry
897; CHECK-NEXT:    lxv v2, 0(r3)
898; CHECK-NEXT:    xsrqpi 1, v2, v2, 1
899; CHECK-NEXT:    stxv v2, 0(r4)
900; CHECK-NEXT:    blr
901;
902; CHECK-P8-LABEL: qpTrunc:
903; CHECK-P8:       # %bb.0: # %entry
904; CHECK-P8-NEXT:    mflr r0
905; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
906; CHECK-P8-NEXT:    .cfi_offset lr, 16
907; CHECK-P8-NEXT:    .cfi_offset r30, -16
908; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
909; CHECK-P8-NEXT:    std r0, 16(r1)
910; CHECK-P8-NEXT:    stdu r1, -48(r1)
911; CHECK-P8-NEXT:    lvx v2, 0, r3
912; CHECK-P8-NEXT:    mr r30, r4
913; CHECK-P8-NEXT:    bl truncf128
914; CHECK-P8-NEXT:    nop
915; CHECK-P8-NEXT:    stvx v2, 0, r30
916; CHECK-P8-NEXT:    addi r1, r1, 48
917; CHECK-P8-NEXT:    ld r0, 16(r1)
918; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
919; CHECK-P8-NEXT:    mtlr r0
920; CHECK-P8-NEXT:    blr
921entry:
922  %0 = load fp128, fp128* %a, align 16
923  %1 = tail call fp128 @llvm.trunc.f128(fp128 %0)
924  store fp128 %1, fp128* %res, align 16
925  ret void
926}
927declare fp128 @llvm.trunc.f128(fp128 %Val)
928
929define dso_local void @qpRound(fp128* nocapture readonly %a, fp128* nocapture %res) {
930; CHECK-LABEL: qpRound:
931; CHECK:       # %bb.0: # %entry
932; CHECK-NEXT:    lxv v2, 0(r3)
933; CHECK-NEXT:    xsrqpi 0, v2, v2, 0
934; CHECK-NEXT:    stxv v2, 0(r4)
935; CHECK-NEXT:    blr
936;
937; CHECK-P8-LABEL: qpRound:
938; CHECK-P8:       # %bb.0: # %entry
939; CHECK-P8-NEXT:    mflr r0
940; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
941; CHECK-P8-NEXT:    .cfi_offset lr, 16
942; CHECK-P8-NEXT:    .cfi_offset r30, -16
943; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
944; CHECK-P8-NEXT:    std r0, 16(r1)
945; CHECK-P8-NEXT:    stdu r1, -48(r1)
946; CHECK-P8-NEXT:    lvx v2, 0, r3
947; CHECK-P8-NEXT:    mr r30, r4
948; CHECK-P8-NEXT:    bl roundf128
949; CHECK-P8-NEXT:    nop
950; CHECK-P8-NEXT:    stvx v2, 0, r30
951; CHECK-P8-NEXT:    addi r1, r1, 48
952; CHECK-P8-NEXT:    ld r0, 16(r1)
953; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
954; CHECK-P8-NEXT:    mtlr r0
955; CHECK-P8-NEXT:    blr
956entry:
957  %0 = load fp128, fp128* %a, align 16
958  %1 = tail call fp128 @llvm.round.f128(fp128 %0)
959  store fp128 %1, fp128* %res, align 16
960  ret void
961}
962declare fp128 @llvm.round.f128(fp128 %Val)
963
964define dso_local void @qpLRound(fp128* nocapture readonly %a, i32* nocapture %res) {
965; CHECK-LABEL: qpLRound:
966; CHECK:       # %bb.0: # %entry
967; CHECK-NEXT:    mflr r0
968; CHECK-NEXT:    .cfi_def_cfa_offset 48
969; CHECK-NEXT:    .cfi_offset lr, 16
970; CHECK-NEXT:    .cfi_offset r30, -16
971; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
972; CHECK-NEXT:    std r0, 16(r1)
973; CHECK-NEXT:    stdu r1, -48(r1)
974; CHECK-NEXT:    lxv v2, 0(r3)
975; CHECK-NEXT:    mr r30, r4
976; CHECK-NEXT:    bl lroundf128
977; CHECK-NEXT:    nop
978; CHECK-NEXT:    stw r3, 0(r30)
979; CHECK-NEXT:    addi r1, r1, 48
980; CHECK-NEXT:    ld r0, 16(r1)
981; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
982; CHECK-NEXT:    mtlr r0
983; CHECK-NEXT:    blr
984;
985; CHECK-P8-LABEL: qpLRound:
986; CHECK-P8:       # %bb.0: # %entry
987; CHECK-P8-NEXT:    mflr r0
988; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
989; CHECK-P8-NEXT:    .cfi_offset lr, 16
990; CHECK-P8-NEXT:    .cfi_offset r30, -16
991; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
992; CHECK-P8-NEXT:    std r0, 16(r1)
993; CHECK-P8-NEXT:    stdu r1, -48(r1)
994; CHECK-P8-NEXT:    lvx v2, 0, r3
995; CHECK-P8-NEXT:    mr r30, r4
996; CHECK-P8-NEXT:    bl lroundf128
997; CHECK-P8-NEXT:    nop
998; CHECK-P8-NEXT:    stw r3, 0(r30)
999; CHECK-P8-NEXT:    addi r1, r1, 48
1000; CHECK-P8-NEXT:    ld r0, 16(r1)
1001; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1002; CHECK-P8-NEXT:    mtlr r0
1003; CHECK-P8-NEXT:    blr
1004entry:
1005  %0 = load fp128, fp128* %a, align 16
1006  %1 = tail call i32 @llvm.lround.f128(fp128 %0)
1007  store i32 %1, i32* %res, align 16
1008  ret void
1009}
1010declare i32 @llvm.lround.f128(fp128 %Val)
1011
1012define dso_local void @qpLLRound(fp128* nocapture readonly %a, i64* nocapture %res) {
1013; CHECK-LABEL: qpLLRound:
1014; CHECK:       # %bb.0: # %entry
1015; CHECK-NEXT:    mflr r0
1016; CHECK-NEXT:    .cfi_def_cfa_offset 48
1017; CHECK-NEXT:    .cfi_offset lr, 16
1018; CHECK-NEXT:    .cfi_offset r30, -16
1019; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1020; CHECK-NEXT:    std r0, 16(r1)
1021; CHECK-NEXT:    stdu r1, -48(r1)
1022; CHECK-NEXT:    lxv v2, 0(r3)
1023; CHECK-NEXT:    mr r30, r4
1024; CHECK-NEXT:    bl llroundf128
1025; CHECK-NEXT:    nop
1026; CHECK-NEXT:    std r3, 0(r30)
1027; CHECK-NEXT:    addi r1, r1, 48
1028; CHECK-NEXT:    ld r0, 16(r1)
1029; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1030; CHECK-NEXT:    mtlr r0
1031; CHECK-NEXT:    blr
1032;
1033; CHECK-P8-LABEL: qpLLRound:
1034; CHECK-P8:       # %bb.0: # %entry
1035; CHECK-P8-NEXT:    mflr r0
1036; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1037; CHECK-P8-NEXT:    .cfi_offset lr, 16
1038; CHECK-P8-NEXT:    .cfi_offset r30, -16
1039; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1040; CHECK-P8-NEXT:    std r0, 16(r1)
1041; CHECK-P8-NEXT:    stdu r1, -48(r1)
1042; CHECK-P8-NEXT:    lvx v2, 0, r3
1043; CHECK-P8-NEXT:    mr r30, r4
1044; CHECK-P8-NEXT:    bl llroundf128
1045; CHECK-P8-NEXT:    nop
1046; CHECK-P8-NEXT:    std r3, 0(r30)
1047; CHECK-P8-NEXT:    addi r1, r1, 48
1048; CHECK-P8-NEXT:    ld r0, 16(r1)
1049; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1050; CHECK-P8-NEXT:    mtlr r0
1051; CHECK-P8-NEXT:    blr
1052entry:
1053  %0 = load fp128, fp128* %a, align 16
1054  %1 = tail call i64 @llvm.llround.f128(fp128 %0)
1055  store i64 %1, i64* %res, align 16
1056  ret void
1057}
1058declare i64 @llvm.llround.f128(fp128 %Val)
1059
1060define dso_local void @qpRint(fp128* nocapture readonly %a, fp128* nocapture %res) {
1061; CHECK-LABEL: qpRint:
1062; CHECK:       # %bb.0: # %entry
1063; CHECK-NEXT:    lxv v2, 0(r3)
1064; CHECK-NEXT:    xsrqpix 0, v2, v2, 3
1065; CHECK-NEXT:    stxv v2, 0(r4)
1066; CHECK-NEXT:    blr
1067;
1068; CHECK-P8-LABEL: qpRint:
1069; CHECK-P8:       # %bb.0: # %entry
1070; CHECK-P8-NEXT:    mflr r0
1071; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1072; CHECK-P8-NEXT:    .cfi_offset lr, 16
1073; CHECK-P8-NEXT:    .cfi_offset r30, -16
1074; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1075; CHECK-P8-NEXT:    std r0, 16(r1)
1076; CHECK-P8-NEXT:    stdu r1, -48(r1)
1077; CHECK-P8-NEXT:    lvx v2, 0, r3
1078; CHECK-P8-NEXT:    mr r30, r4
1079; CHECK-P8-NEXT:    bl rintf128
1080; CHECK-P8-NEXT:    nop
1081; CHECK-P8-NEXT:    stvx v2, 0, r30
1082; CHECK-P8-NEXT:    addi r1, r1, 48
1083; CHECK-P8-NEXT:    ld r0, 16(r1)
1084; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1085; CHECK-P8-NEXT:    mtlr r0
1086; CHECK-P8-NEXT:    blr
1087entry:
1088  %0 = load fp128, fp128* %a, align 16
1089  %1 = tail call fp128 @llvm.rint.f128(fp128 %0)
1090  store fp128 %1, fp128* %res, align 16
1091  ret void
1092}
1093declare fp128 @llvm.rint.f128(fp128 %Val)
1094
1095define dso_local void @qpLRint(fp128* nocapture readonly %a, i32* nocapture %res) {
1096; CHECK-LABEL: qpLRint:
1097; CHECK:       # %bb.0: # %entry
1098; CHECK-NEXT:    mflr r0
1099; CHECK-NEXT:    .cfi_def_cfa_offset 48
1100; CHECK-NEXT:    .cfi_offset lr, 16
1101; CHECK-NEXT:    .cfi_offset r30, -16
1102; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1103; CHECK-NEXT:    std r0, 16(r1)
1104; CHECK-NEXT:    stdu r1, -48(r1)
1105; CHECK-NEXT:    lxv v2, 0(r3)
1106; CHECK-NEXT:    mr r30, r4
1107; CHECK-NEXT:    bl lrintf128
1108; CHECK-NEXT:    nop
1109; CHECK-NEXT:    stw r3, 0(r30)
1110; CHECK-NEXT:    addi r1, r1, 48
1111; CHECK-NEXT:    ld r0, 16(r1)
1112; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1113; CHECK-NEXT:    mtlr r0
1114; CHECK-NEXT:    blr
1115;
1116; CHECK-P8-LABEL: qpLRint:
1117; CHECK-P8:       # %bb.0: # %entry
1118; CHECK-P8-NEXT:    mflr r0
1119; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1120; CHECK-P8-NEXT:    .cfi_offset lr, 16
1121; CHECK-P8-NEXT:    .cfi_offset r30, -16
1122; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1123; CHECK-P8-NEXT:    std r0, 16(r1)
1124; CHECK-P8-NEXT:    stdu r1, -48(r1)
1125; CHECK-P8-NEXT:    lvx v2, 0, r3
1126; CHECK-P8-NEXT:    mr r30, r4
1127; CHECK-P8-NEXT:    bl lrintf128
1128; CHECK-P8-NEXT:    nop
1129; CHECK-P8-NEXT:    stw r3, 0(r30)
1130; CHECK-P8-NEXT:    addi r1, r1, 48
1131; CHECK-P8-NEXT:    ld r0, 16(r1)
1132; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1133; CHECK-P8-NEXT:    mtlr r0
1134; CHECK-P8-NEXT:    blr
1135entry:
1136  %0 = load fp128, fp128* %a, align 16
1137  %1 = tail call i32 @llvm.lrint.f128(fp128 %0)
1138  store i32 %1, i32* %res, align 16
1139  ret void
1140}
1141declare i32 @llvm.lrint.f128(fp128 %Val)
1142
1143define dso_local void @qpLLRint(fp128* nocapture readonly %a, i64* nocapture %res) {
1144; CHECK-LABEL: qpLLRint:
1145; CHECK:       # %bb.0: # %entry
1146; CHECK-NEXT:    mflr r0
1147; CHECK-NEXT:    .cfi_def_cfa_offset 48
1148; CHECK-NEXT:    .cfi_offset lr, 16
1149; CHECK-NEXT:    .cfi_offset r30, -16
1150; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1151; CHECK-NEXT:    std r0, 16(r1)
1152; CHECK-NEXT:    stdu r1, -48(r1)
1153; CHECK-NEXT:    lxv v2, 0(r3)
1154; CHECK-NEXT:    mr r30, r4
1155; CHECK-NEXT:    bl llrintf128
1156; CHECK-NEXT:    nop
1157; CHECK-NEXT:    std r3, 0(r30)
1158; CHECK-NEXT:    addi r1, r1, 48
1159; CHECK-NEXT:    ld r0, 16(r1)
1160; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1161; CHECK-NEXT:    mtlr r0
1162; CHECK-NEXT:    blr
1163;
1164; CHECK-P8-LABEL: qpLLRint:
1165; CHECK-P8:       # %bb.0: # %entry
1166; CHECK-P8-NEXT:    mflr r0
1167; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1168; CHECK-P8-NEXT:    .cfi_offset lr, 16
1169; CHECK-P8-NEXT:    .cfi_offset r30, -16
1170; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1171; CHECK-P8-NEXT:    std r0, 16(r1)
1172; CHECK-P8-NEXT:    stdu r1, -48(r1)
1173; CHECK-P8-NEXT:    lvx v2, 0, r3
1174; CHECK-P8-NEXT:    mr r30, r4
1175; CHECK-P8-NEXT:    bl llrintf128
1176; CHECK-P8-NEXT:    nop
1177; CHECK-P8-NEXT:    std r3, 0(r30)
1178; CHECK-P8-NEXT:    addi r1, r1, 48
1179; CHECK-P8-NEXT:    ld r0, 16(r1)
1180; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1181; CHECK-P8-NEXT:    mtlr r0
1182; CHECK-P8-NEXT:    blr
1183entry:
1184  %0 = load fp128, fp128* %a, align 16
1185  %1 = tail call i64 @llvm.llrint.f128(fp128 %0)
1186  store i64 %1, i64* %res, align 16
1187  ret void
1188}
1189declare i64 @llvm.llrint.f128(fp128 %Val)
1190
1191define dso_local void @qpNearByInt(fp128* nocapture readonly %a, fp128* nocapture %res) {
1192; CHECK-LABEL: qpNearByInt:
1193; CHECK:       # %bb.0: # %entry
1194; CHECK-NEXT:    lxv v2, 0(r3)
1195; CHECK-NEXT:    xsrqpi 0, v2, v2, 3
1196; CHECK-NEXT:    stxv v2, 0(r4)
1197; CHECK-NEXT:    blr
1198;
1199; CHECK-P8-LABEL: qpNearByInt:
1200; CHECK-P8:       # %bb.0: # %entry
1201; CHECK-P8-NEXT:    mflr r0
1202; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1203; CHECK-P8-NEXT:    .cfi_offset lr, 16
1204; CHECK-P8-NEXT:    .cfi_offset r30, -16
1205; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1206; CHECK-P8-NEXT:    std r0, 16(r1)
1207; CHECK-P8-NEXT:    stdu r1, -48(r1)
1208; CHECK-P8-NEXT:    lvx v2, 0, r3
1209; CHECK-P8-NEXT:    mr r30, r4
1210; CHECK-P8-NEXT:    bl nearbyintf128
1211; CHECK-P8-NEXT:    nop
1212; CHECK-P8-NEXT:    stvx v2, 0, r30
1213; CHECK-P8-NEXT:    addi r1, r1, 48
1214; CHECK-P8-NEXT:    ld r0, 16(r1)
1215; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1216; CHECK-P8-NEXT:    mtlr r0
1217; CHECK-P8-NEXT:    blr
1218entry:
1219  %0 = load fp128, fp128* %a, align 16
1220  %1 = tail call fp128 @llvm.nearbyint.f128(fp128 %0)
1221  store fp128 %1, fp128* %res, align 16
1222  ret void
1223}
1224declare fp128 @llvm.nearbyint.f128(fp128 %Val)
1225
1226define dso_local void @qpFMA(fp128* %a, fp128* %b, fp128* %c, fp128* %res) {
1227; CHECK-LABEL: qpFMA:
1228; CHECK:       # %bb.0: # %entry
1229; CHECK-NEXT:    lxv v2, 0(r3)
1230; CHECK-NEXT:    lxv v3, 0(r4)
1231; CHECK-NEXT:    lxv v4, 0(r5)
1232; CHECK-NEXT:    xsmaddqp v4, v2, v3
1233; CHECK-NEXT:    stxv v4, 0(r6)
1234; CHECK-NEXT:    blr
1235;
1236; CHECK-P8-LABEL: qpFMA:
1237; CHECK-P8:       # %bb.0: # %entry
1238; CHECK-P8-NEXT:    mflr r0
1239; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1240; CHECK-P8-NEXT:    .cfi_offset lr, 16
1241; CHECK-P8-NEXT:    .cfi_offset r30, -16
1242; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1243; CHECK-P8-NEXT:    std r0, 16(r1)
1244; CHECK-P8-NEXT:    stdu r1, -48(r1)
1245; CHECK-P8-NEXT:    lvx v2, 0, r3
1246; CHECK-P8-NEXT:    lvx v3, 0, r4
1247; CHECK-P8-NEXT:    lvx v4, 0, r5
1248; CHECK-P8-NEXT:    mr r30, r6
1249; CHECK-P8-NEXT:    bl fmaf128
1250; CHECK-P8-NEXT:    nop
1251; CHECK-P8-NEXT:    stvx v2, 0, r30
1252; CHECK-P8-NEXT:    addi r1, r1, 48
1253; CHECK-P8-NEXT:    ld r0, 16(r1)
1254; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1255; CHECK-P8-NEXT:    mtlr r0
1256; CHECK-P8-NEXT:    blr
1257entry:
1258  %0 = load fp128, fp128* %a, align 16
1259  %1 = load fp128, fp128* %b, align 16
1260  %2 = load fp128, fp128* %c, align 16
1261  %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %1, fp128 %2)
1262  store fp128 %3, fp128* %res, align 16
1263  ret void
1264}
1265declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
1266