1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; REQUIRES: asserts
3; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1                        | FileCheck %s --check-prefix=FMFDEBUG
4; RUN: llc < %s -mtriple=powerpc64le                                                           | FileCheck %s --check-prefix=FMF
5; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG
6; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math | FileCheck %s --check-prefix=GLOBAL
7
8; Test FP transforms using instruction/node-level fast-math-flags.
9; We're also checking debug output to verify that FMF is propagated to the newly created nodes.
10; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes.
11
12declare float @llvm.fma.f32(float, float, float)
13declare float @llvm.sqrt.f32(float)
14
15; X * Y + Z --> fma(X, Y, Z)
16
17; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:'
18; FMFDEBUG:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
19; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:'
20
21define float @fmul_fadd_contract1(float %x, float %y, float %z) {
22; FMF-LABEL: fmul_fadd_contract1:
23; FMF:       # %bb.0:
24; FMF-NEXT:    xsmaddasp 3, 1, 2
25; FMF-NEXT:    fmr 1, 3
26; FMF-NEXT:    blr
27;
28; GLOBAL-LABEL: fmul_fadd_contract1:
29; GLOBAL:       # %bb.0:
30; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
31; GLOBAL-NEXT:    fmr 1, 3
32; GLOBAL-NEXT:    blr
33  %mul = fmul float %x, %y
34  %add = fadd contract float %mul, %z
35  ret float %add
36}
37
38; This shouldn't change anything - the intermediate fmul result is now also flagged.
39
40; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:'
41; FMFDEBUG:         fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
42; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:'
43
44define float @fmul_fadd_contract2(float %x, float %y, float %z) {
45; FMF-LABEL: fmul_fadd_contract2:
46; FMF:       # %bb.0:
47; FMF-NEXT:    xsmaddasp 3, 1, 2
48; FMF-NEXT:    fmr 1, 3
49; FMF-NEXT:    blr
50;
51; GLOBAL-LABEL: fmul_fadd_contract2:
52; GLOBAL:       # %bb.0:
53; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
54; GLOBAL-NEXT:    fmr 1, 3
55; GLOBAL-NEXT:    blr
56  %mul = fmul contract float %x, %y
57  %add = fadd contract float %mul, %z
58  ret float %add
59}
60
61; Reassociation implies that FMA contraction is allowed.
62
63; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:'
64; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
65; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:'
66
67define float @fmul_fadd_reassoc1(float %x, float %y, float %z) {
68; FMF-LABEL: fmul_fadd_reassoc1:
69; FMF:       # %bb.0:
70; FMF-NEXT:    xsmaddasp 3, 1, 2
71; FMF-NEXT:    fmr 1, 3
72; FMF-NEXT:    blr
73;
74; GLOBAL-LABEL: fmul_fadd_reassoc1:
75; GLOBAL:       # %bb.0:
76; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
77; GLOBAL-NEXT:    fmr 1, 3
78; GLOBAL-NEXT:    blr
79  %mul = fmul float %x, %y
80  %add = fadd reassoc float %mul, %z
81  ret float %add
82}
83
84; This shouldn't change anything - the intermediate fmul result is now also flagged.
85
86; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:'
87; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}
88; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:'
89
90define float @fmul_fadd_reassoc2(float %x, float %y, float %z) {
91; FMF-LABEL: fmul_fadd_reassoc2:
92; FMF:       # %bb.0:
93; FMF-NEXT:    xsmaddasp 3, 1, 2
94; FMF-NEXT:    fmr 1, 3
95; FMF-NEXT:    blr
96;
97; GLOBAL-LABEL: fmul_fadd_reassoc2:
98; GLOBAL:       # %bb.0:
99; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
100; GLOBAL-NEXT:    fmr 1, 3
101; GLOBAL-NEXT:    blr
102  %mul = fmul reassoc float %x, %y
103  %add = fadd reassoc float %mul, %z
104  ret float %add
105}
106
107; The fadd is now fully 'fast'. This implies that contraction is allowed.
108
109; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:'
110; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
111; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:'
112
113define float @fmul_fadd_fast1(float %x, float %y, float %z) {
114; FMF-LABEL: fmul_fadd_fast1:
115; FMF:       # %bb.0:
116; FMF-NEXT:    xsmaddasp 3, 1, 2
117; FMF-NEXT:    fmr 1, 3
118; FMF-NEXT:    blr
119;
120; GLOBAL-LABEL: fmul_fadd_fast1:
121; GLOBAL:       # %bb.0:
122; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
123; GLOBAL-NEXT:    fmr 1, 3
124; GLOBAL-NEXT:    blr
125  %mul = fmul reassoc float %x, %y
126  %add = fadd reassoc float %mul, %z
127  ret float %add
128}
129
130; This shouldn't change anything - the intermediate fmul result is now also flagged.
131
132; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:'
133; FMFDEBUG:         fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}}
134; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:'
135
136define float @fmul_fadd_fast2(float %x, float %y, float %z) {
137; FMF-LABEL: fmul_fadd_fast2:
138; FMF:       # %bb.0:
139; FMF-NEXT:    xsmaddasp 3, 1, 2
140; FMF-NEXT:    fmr 1, 3
141; FMF-NEXT:    blr
142;
143; GLOBAL-LABEL: fmul_fadd_fast2:
144; GLOBAL:       # %bb.0:
145; GLOBAL-NEXT:    xsmaddasp 3, 1, 2
146; GLOBAL-NEXT:    fmr 1, 3
147; GLOBAL-NEXT:    blr
148  %mul = fmul reassoc float %x, %y
149  %add = fadd reassoc float %mul, %z
150  ret float %add
151}
152
153; fma(X, 7.0, X * 42.0) --> X * 49.0
154; This is the minimum FMF needed for this transform - the FMA allows reassociation.
155
156; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
157; FMFDEBUG:         fmul reassoc {{t[0-9]+}},
158; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
159
160; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:'
161; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
162; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:'
163
164define float @fmul_fma_reassoc1(float %x) {
165; FMF-LABEL: fmul_fma_reassoc1:
166; FMF:       # %bb.0:
167; FMF-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
168; FMF-NEXT:    lfs 0, .LCPI6_0@toc@l(3)
169; FMF-NEXT:    xsmulsp 1, 1, 0
170; FMF-NEXT:    blr
171;
172; GLOBAL-LABEL: fmul_fma_reassoc1:
173; GLOBAL:       # %bb.0:
174; GLOBAL-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
175; GLOBAL-NEXT:    lfs 0, .LCPI6_0@toc@l(3)
176; GLOBAL-NEXT:    xsmulsp 1, 1, 0
177; GLOBAL-NEXT:    blr
178  %mul = fmul float %x, 42.0
179  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
180  ret float %fma
181}
182
183; This shouldn't change anything - the intermediate fmul result is now also flagged.
184
185; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
186; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
187; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
188
189; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:'
190; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
191; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:'
192
193define float @fmul_fma_reassoc2(float %x) {
194; FMF-LABEL: fmul_fma_reassoc2:
195; FMF:       # %bb.0:
196; FMF-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
197; FMF-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
198; FMF-NEXT:    xsmulsp 1, 1, 0
199; FMF-NEXT:    blr
200;
201; GLOBAL-LABEL: fmul_fma_reassoc2:
202; GLOBAL:       # %bb.0:
203; GLOBAL-NEXT:    addis 3, 2, .LCPI7_0@toc@ha
204; GLOBAL-NEXT:    lfs 0, .LCPI7_0@toc@l(3)
205; GLOBAL-NEXT:    xsmulsp 1, 1, 0
206; GLOBAL-NEXT:    blr
207  %mul = fmul reassoc float %x, 42.0
208  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
209  ret float %fma
210}
211
212; The FMA is now fully 'fast'. This implies that reassociation is allowed.
213
214; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
215; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
216; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
217
218; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:'
219; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
220; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:'
221
222define float @fmul_fma_fast1(float %x) {
223; FMF-LABEL: fmul_fma_fast1:
224; FMF:       # %bb.0:
225; FMF-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
226; FMF-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
227; FMF-NEXT:    xsmulsp 1, 1, 0
228; FMF-NEXT:    blr
229;
230; GLOBAL-LABEL: fmul_fma_fast1:
231; GLOBAL:       # %bb.0:
232; GLOBAL-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
233; GLOBAL-NEXT:    lfs 0, .LCPI8_0@toc@l(3)
234; GLOBAL-NEXT:    xsmulsp 1, 1, 0
235; GLOBAL-NEXT:    blr
236  %mul = fmul float %x, 42.0
237  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
238  ret float %fma
239}
240
241; This shouldn't change anything - the intermediate fmul result is now also flagged.
242
243; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
244; FMFDEBUG:         fmul reassoc {{t[0-9]+}}
245; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
246
247; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:'
248; GLOBALDEBUG:         fmul reassoc {{t[0-9]+}}
249; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:'
250
251define float @fmul_fma_fast2(float %x) {
252; FMF-LABEL: fmul_fma_fast2:
253; FMF:       # %bb.0:
254; FMF-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
255; FMF-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
256; FMF-NEXT:    xsmulsp 1, 1, 0
257; FMF-NEXT:    blr
258;
259; GLOBAL-LABEL: fmul_fma_fast2:
260; GLOBAL:       # %bb.0:
261; GLOBAL-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
262; GLOBAL-NEXT:    lfs 0, .LCPI9_0@toc@l(3)
263; GLOBAL-NEXT:    xsmulsp 1, 1, 0
264; GLOBAL-NEXT:    blr
265  %mul = fmul reassoc float %x, 42.0
266  %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul)
267  ret float %fma
268}
269
270; Reduced precision for sqrt is allowed - should use estimate and NR iterations.
271
272; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
273; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
274; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
275
276; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_ieee:'
277; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
278; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_ieee:'
279
280define float @sqrt_afn_ieee(float %x) #0 {
281; FMF-LABEL: sqrt_afn_ieee:
282; FMF:       # %bb.0:
283; FMF-NEXT:    xsabsdp 0, 1
284; FMF-NEXT:    addis 3, 2, .LCPI10_2@toc@ha
285; FMF-NEXT:    lfs 2, .LCPI10_2@toc@l(3)
286; FMF-NEXT:    fcmpu 0, 0, 2
287; FMF-NEXT:    xxlxor 0, 0, 0
288; FMF-NEXT:    blt 0, .LBB10_2
289; FMF-NEXT:  # %bb.1:
290; FMF-NEXT:    xsrsqrtesp 0, 1
291; FMF-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
292; FMF-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
293; FMF-NEXT:    lfs 2, .LCPI10_0@toc@l(3)
294; FMF-NEXT:    lfs 3, .LCPI10_1@toc@l(4)
295; FMF-NEXT:    xsmulsp 1, 1, 0
296; FMF-NEXT:    xsmulsp 0, 1, 0
297; FMF-NEXT:    xsmulsp 1, 1, 2
298; FMF-NEXT:    xsaddsp 0, 0, 3
299; FMF-NEXT:    xsmulsp 0, 1, 0
300; FMF-NEXT:  .LBB10_2:
301; FMF-NEXT:    fmr 1, 0
302; FMF-NEXT:    blr
303;
304; GLOBAL-LABEL: sqrt_afn_ieee:
305; GLOBAL:       # %bb.0:
306; GLOBAL-NEXT:    xsabsdp 0, 1
307; GLOBAL-NEXT:    addis 3, 2, .LCPI10_2@toc@ha
308; GLOBAL-NEXT:    lfs 2, .LCPI10_2@toc@l(3)
309; GLOBAL-NEXT:    fcmpu 0, 0, 2
310; GLOBAL-NEXT:    xxlxor 0, 0, 0
311; GLOBAL-NEXT:    blt 0, .LBB10_2
312; GLOBAL-NEXT:  # %bb.1:
313; GLOBAL-NEXT:    xsrsqrtesp 0, 1
314; GLOBAL-NEXT:    addis 3, 2, .LCPI10_0@toc@ha
315; GLOBAL-NEXT:    addis 4, 2, .LCPI10_1@toc@ha
316; GLOBAL-NEXT:    lfs 2, .LCPI10_0@toc@l(3)
317; GLOBAL-NEXT:    lfs 3, .LCPI10_1@toc@l(4)
318; GLOBAL-NEXT:    xsmulsp 1, 1, 0
319; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
320; GLOBAL-NEXT:    xsmulsp 0, 1, 3
321; GLOBAL-NEXT:    xsmulsp 0, 0, 2
322; GLOBAL-NEXT:  .LBB10_2:
323; GLOBAL-NEXT:    fmr 1, 0
324; GLOBAL-NEXT:    blr
325  %rt = call afn ninf float @llvm.sqrt.f32(float %x)
326  ret float %rt
327}
328
329define float @sqrt_afn_ieee_inf(float %x) #0 {
330; FMF-LABEL: sqrt_afn_ieee_inf:
331; FMF:       # %bb.0:
332; FMF-NEXT:    xssqrtsp 1, 1
333; FMF-NEXT:    blr
334;
335; GLOBAL-LABEL: sqrt_afn_ieee_inf:
336; GLOBAL:       # %bb.0:
337; GLOBAL-NEXT:    xssqrtsp 1, 1
338; GLOBAL-NEXT:    blr
339  %rt = call afn float @llvm.sqrt.f32(float %x)
340  ret float %rt
341}
342
343; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
344; FMFDEBUG:         fmul ninf afn {{t[0-9]+}}
345; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
346
347; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
348; GLOBALDEBUG:         fmul ninf afn {{t[0-9]+}}
349; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_afn_preserve_sign:'
350
351define float @sqrt_afn_preserve_sign(float %x) #1 {
352; FMF-LABEL: sqrt_afn_preserve_sign:
353; FMF:       # %bb.0:
354; FMF-NEXT:    xxlxor 0, 0, 0
355; FMF-NEXT:    fcmpu 0, 1, 0
356; FMF-NEXT:    beq 0, .LBB12_2
357; FMF-NEXT:  # %bb.1:
358; FMF-NEXT:    xsrsqrtesp 0, 1
359; FMF-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
360; FMF-NEXT:    addis 4, 2, .LCPI12_1@toc@ha
361; FMF-NEXT:    lfs 2, .LCPI12_0@toc@l(3)
362; FMF-NEXT:    lfs 3, .LCPI12_1@toc@l(4)
363; FMF-NEXT:    xsmulsp 1, 1, 0
364; FMF-NEXT:    xsmulsp 0, 1, 0
365; FMF-NEXT:    xsmulsp 1, 1, 2
366; FMF-NEXT:    xsaddsp 0, 0, 3
367; FMF-NEXT:    xsmulsp 0, 1, 0
368; FMF-NEXT:  .LBB12_2:
369; FMF-NEXT:    fmr 1, 0
370; FMF-NEXT:    blr
371;
372; GLOBAL-LABEL: sqrt_afn_preserve_sign:
373; GLOBAL:       # %bb.0:
374; GLOBAL-NEXT:    xxlxor 0, 0, 0
375; GLOBAL-NEXT:    fcmpu 0, 1, 0
376; GLOBAL-NEXT:    beq 0, .LBB12_2
377; GLOBAL-NEXT:  # %bb.1:
378; GLOBAL-NEXT:    xsrsqrtesp 0, 1
379; GLOBAL-NEXT:    addis 3, 2, .LCPI12_0@toc@ha
380; GLOBAL-NEXT:    addis 4, 2, .LCPI12_1@toc@ha
381; GLOBAL-NEXT:    lfs 2, .LCPI12_0@toc@l(3)
382; GLOBAL-NEXT:    lfs 3, .LCPI12_1@toc@l(4)
383; GLOBAL-NEXT:    xsmulsp 1, 1, 0
384; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
385; GLOBAL-NEXT:    xsmulsp 0, 1, 3
386; GLOBAL-NEXT:    xsmulsp 0, 0, 2
387; GLOBAL-NEXT:  .LBB12_2:
388; GLOBAL-NEXT:    fmr 1, 0
389; GLOBAL-NEXT:    blr
390  %rt = call afn ninf float @llvm.sqrt.f32(float %x)
391  ret float %rt
392}
393
394define float @sqrt_afn_preserve_sign_inf(float %x) #1 {
395; FMF-LABEL: sqrt_afn_preserve_sign_inf:
396; FMF:       # %bb.0:
397; FMF-NEXT:    xssqrtsp 1, 1
398; FMF-NEXT:    blr
399;
400; GLOBAL-LABEL: sqrt_afn_preserve_sign_inf:
401; GLOBAL:       # %bb.0:
402; GLOBAL-NEXT:    xssqrtsp 1, 1
403; GLOBAL-NEXT:    blr
404  %rt = call afn float @llvm.sqrt.f32(float %x)
405  ret float %rt
406}
407
408; The call is now fully 'fast'. This implies that approximation is allowed.
409
410; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
411; FMFDEBUG:         fmul ninf afn reassoc {{t[0-9]+}}
412; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
413
414; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_ieee:'
415; GLOBALDEBUG:         fmul ninf afn reassoc {{t[0-9]+}}
416; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_ieee:'
417
418define float @sqrt_fast_ieee(float %x) #0 {
419; FMF-LABEL: sqrt_fast_ieee:
420; FMF:       # %bb.0:
421; FMF-NEXT:    xsabsdp 0, 1
422; FMF-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
423; FMF-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
424; FMF-NEXT:    fcmpu 0, 0, 2
425; FMF-NEXT:    xxlxor 0, 0, 0
426; FMF-NEXT:    blt 0, .LBB14_2
427; FMF-NEXT:  # %bb.1:
428; FMF-NEXT:    xsrsqrtesp 0, 1
429; FMF-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
430; FMF-NEXT:    addis 4, 2, .LCPI14_1@toc@ha
431; FMF-NEXT:    lfs 2, .LCPI14_0@toc@l(3)
432; FMF-NEXT:    lfs 3, .LCPI14_1@toc@l(4)
433; FMF-NEXT:    xsmulsp 1, 1, 0
434; FMF-NEXT:    xsmaddasp 2, 1, 0
435; FMF-NEXT:    xsmulsp 0, 1, 3
436; FMF-NEXT:    xsmulsp 0, 0, 2
437; FMF-NEXT:  .LBB14_2:
438; FMF-NEXT:    fmr 1, 0
439; FMF-NEXT:    blr
440;
441; GLOBAL-LABEL: sqrt_fast_ieee:
442; GLOBAL:       # %bb.0:
443; GLOBAL-NEXT:    xsabsdp 0, 1
444; GLOBAL-NEXT:    addis 3, 2, .LCPI14_2@toc@ha
445; GLOBAL-NEXT:    lfs 2, .LCPI14_2@toc@l(3)
446; GLOBAL-NEXT:    fcmpu 0, 0, 2
447; GLOBAL-NEXT:    xxlxor 0, 0, 0
448; GLOBAL-NEXT:    blt 0, .LBB14_2
449; GLOBAL-NEXT:  # %bb.1:
450; GLOBAL-NEXT:    xsrsqrtesp 0, 1
451; GLOBAL-NEXT:    addis 3, 2, .LCPI14_0@toc@ha
452; GLOBAL-NEXT:    addis 4, 2, .LCPI14_1@toc@ha
453; GLOBAL-NEXT:    lfs 2, .LCPI14_0@toc@l(3)
454; GLOBAL-NEXT:    lfs 3, .LCPI14_1@toc@l(4)
455; GLOBAL-NEXT:    xsmulsp 1, 1, 0
456; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
457; GLOBAL-NEXT:    xsmulsp 0, 1, 3
458; GLOBAL-NEXT:    xsmulsp 0, 0, 2
459; GLOBAL-NEXT:  .LBB14_2:
460; GLOBAL-NEXT:    fmr 1, 0
461; GLOBAL-NEXT:    blr
462  %rt = call reassoc afn ninf float @llvm.sqrt.f32(float %x)
463  ret float %rt
464}
465
466; The call is now fully 'fast'. This implies that approximation is allowed.
467
468; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
469; FMFDEBUG:         fmul ninf afn reassoc {{t[0-9]+}}
470; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
471
472; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
473; GLOBALDEBUG:         fmul ninf afn reassoc {{t[0-9]+}}
474; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'sqrt_fast_preserve_sign:'
475
476define float @sqrt_fast_preserve_sign(float %x) #1 {
477; FMF-LABEL: sqrt_fast_preserve_sign:
478; FMF:       # %bb.0:
479; FMF-NEXT:    xxlxor 0, 0, 0
480; FMF-NEXT:    fcmpu 0, 1, 0
481; FMF-NEXT:    beq 0, .LBB15_2
482; FMF-NEXT:  # %bb.1:
483; FMF-NEXT:    xsrsqrtesp 0, 1
484; FMF-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
485; FMF-NEXT:    addis 4, 2, .LCPI15_1@toc@ha
486; FMF-NEXT:    lfs 2, .LCPI15_0@toc@l(3)
487; FMF-NEXT:    lfs 3, .LCPI15_1@toc@l(4)
488; FMF-NEXT:    xsmulsp 1, 1, 0
489; FMF-NEXT:    xsmaddasp 2, 1, 0
490; FMF-NEXT:    xsmulsp 0, 1, 3
491; FMF-NEXT:    xsmulsp 0, 0, 2
492; FMF-NEXT:  .LBB15_2:
493; FMF-NEXT:    fmr 1, 0
494; FMF-NEXT:    blr
495;
496; GLOBAL-LABEL: sqrt_fast_preserve_sign:
497; GLOBAL:       # %bb.0:
498; GLOBAL-NEXT:    xxlxor 0, 0, 0
499; GLOBAL-NEXT:    fcmpu 0, 1, 0
500; GLOBAL-NEXT:    beq 0, .LBB15_2
501; GLOBAL-NEXT:  # %bb.1:
502; GLOBAL-NEXT:    xsrsqrtesp 0, 1
503; GLOBAL-NEXT:    addis 3, 2, .LCPI15_0@toc@ha
504; GLOBAL-NEXT:    addis 4, 2, .LCPI15_1@toc@ha
505; GLOBAL-NEXT:    lfs 2, .LCPI15_0@toc@l(3)
506; GLOBAL-NEXT:    lfs 3, .LCPI15_1@toc@l(4)
507; GLOBAL-NEXT:    xsmulsp 1, 1, 0
508; GLOBAL-NEXT:    xsmaddasp 2, 1, 0
509; GLOBAL-NEXT:    xsmulsp 0, 1, 3
510; GLOBAL-NEXT:    xsmulsp 0, 0, 2
511; GLOBAL-NEXT:  .LBB15_2:
512; GLOBAL-NEXT:    fmr 1, 0
513; GLOBAL-NEXT:    blr
514  %rt = call reassoc ninf afn float @llvm.sqrt.f32(float %x)
515  ret float %rt
516}
517
518; fcmp can have fast-math-flags.
519
520; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
521; FMFDEBUG:         select_cc nnan {{t[0-9]+}}
522; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
523
524; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:'
525; GLOBALDEBUG:         select_cc nnan {{t[0-9]+}}
526; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fcmp_nnan:'
527
528define double @fcmp_nnan(double %a, double %y, double %z) {
529; FMF-LABEL: fcmp_nnan:
530; FMF:       # %bb.0:
531; FMF-NEXT:    xxlxor 0, 0, 0
532; FMF-NEXT:    xscmpudp 0, 1, 0
533; FMF-NEXT:    blt 0, .LBB16_2
534; FMF-NEXT:  # %bb.1:
535; FMF-NEXT:    fmr 3, 2
536; FMF-NEXT:  .LBB16_2:
537; FMF-NEXT:    fmr 1, 3
538; FMF-NEXT:    blr
539;
540; GLOBAL-LABEL: fcmp_nnan:
541; GLOBAL:       # %bb.0:
542; GLOBAL-NEXT:    xxlxor 0, 0, 0
543; GLOBAL-NEXT:    xscmpudp 0, 1, 0
544; GLOBAL-NEXT:    blt 0, .LBB16_2
545; GLOBAL-NEXT:  # %bb.1:
546; GLOBAL-NEXT:    fmr 3, 2
547; GLOBAL-NEXT:  .LBB16_2:
548; GLOBAL-NEXT:    fmr 1, 3
549; GLOBAL-NEXT:    blr
550  %cmp = fcmp nnan ult double %a, 0.0
551  %z.y = select i1 %cmp, double %z, double %y
552  ret double %z.y
553}
554
555; FP library calls can have fast-math-flags.
556
557; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
558; FMFDEBUG:         ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2>
559; FMFDEBUG:         ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
560; FMFDEBUG:         f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
561; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'log2_approx:'
562
563; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:'
564; GLOBALDEBUG:         ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2>
565; GLOBALDEBUG:         ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1
566; GLOBALDEBUG:         f64,ch,glue = CopyFromReg t16, Register:f64 $f1, t16:1
567; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'log2_approx:'
568
569declare double @log2(double)
570define double @log2_approx(double %x) nounwind {
571; FMF-LABEL: log2_approx:
572; FMF:       # %bb.0:
573; FMF-NEXT:    mflr 0
574; FMF-NEXT:    std 0, 16(1)
575; FMF-NEXT:    stdu 1, -32(1)
576; FMF-NEXT:    bl log2
577; FMF-NEXT:    nop
578; FMF-NEXT:    addi 1, 1, 32
579; FMF-NEXT:    ld 0, 16(1)
580; FMF-NEXT:    mtlr 0
581; FMF-NEXT:    blr
582;
583; GLOBAL-LABEL: log2_approx:
584; GLOBAL:       # %bb.0:
585; GLOBAL-NEXT:    mflr 0
586; GLOBAL-NEXT:    std 0, 16(1)
587; GLOBAL-NEXT:    stdu 1, -32(1)
588; GLOBAL-NEXT:    bl log2
589; GLOBAL-NEXT:    nop
590; GLOBAL-NEXT:    addi 1, 1, 32
591; GLOBAL-NEXT:    ld 0, 16(1)
592; GLOBAL-NEXT:    mtlr 0
593; GLOBAL-NEXT:    blr
594  %r = call afn double @log2(double %x)
595  ret double %r
596}
597
598; -(X - Y) --> (Y - X)
599
600; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
601; FMFDEBUG:         fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
602; FMFDEBUG:       Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
603
604; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
605; GLOBALDEBUG:         fsub nsz {{t[0-9]+}}, {{t[0-9]+}}
606; GLOBALDEBUG:       Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:'
607
608define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) {
609; FMF-LABEL: fneg_fsub_nozeros_1:
610; FMF:       # %bb.0:
611; FMF-NEXT:    xssubsp 1, 2, 1
612; FMF-NEXT:    blr
613;
614; GLOBAL-LABEL: fneg_fsub_nozeros_1:
615; GLOBAL:       # %bb.0:
616; GLOBAL-NEXT:    xssubsp 1, 2, 1
617; GLOBAL-NEXT:    blr
618  %neg = fsub float %x, %y
619  %add = fsub nsz float 0.0, %neg
620  ret float %add
621}
622
623attributes #0 = { "denormal-fp-math"="ieee,ieee" }
624attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
625