1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
3; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
4
5declare float @llvm.sqrt.f32(float) #0
6declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
7declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
8declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
9declare double @llvm.sqrt.f64(double) #0
10declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
11declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
12
13define float @fsqrt(float %a) #0 {
14; FAULT-LABEL: fsqrt:
15; FAULT:       // %bb.0:
16; FAULT-NEXT:    fsqrt s0, s0
17; FAULT-NEXT:    ret
18;
19; CHECK-LABEL: fsqrt:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    frsqrte s1, s0
22; CHECK-NEXT:    fcmp s0, #0.0
23; CHECK-NEXT:    fmul s2, s1, s1
24; CHECK-NEXT:    frsqrts s2, s0, s2
25; CHECK-NEXT:    fmul s1, s1, s2
26; CHECK-NEXT:    fmul s2, s1, s1
27; CHECK-NEXT:    fmul s1, s1, s0
28; CHECK-NEXT:    frsqrts s2, s0, s2
29; CHECK-NEXT:    fmul s1, s2, s1
30; CHECK-NEXT:    fcsel s0, s0, s1, eq
31; CHECK-NEXT:    ret
32  %1 = tail call fast float @llvm.sqrt.f32(float %a)
33  ret float %1
34}
35
36define float @fsqrt_ieee_denorms(float %a) #1 {
37; FAULT-LABEL: fsqrt_ieee_denorms:
38; FAULT:       // %bb.0:
39; FAULT-NEXT:    fsqrt s0, s0
40; FAULT-NEXT:    ret
41;
42; CHECK-LABEL: fsqrt_ieee_denorms:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    frsqrte s1, s0
45; CHECK-NEXT:    fcmp s0, #0.0
46; CHECK-NEXT:    fmul s2, s1, s1
47; CHECK-NEXT:    frsqrts s2, s0, s2
48; CHECK-NEXT:    fmul s1, s1, s2
49; CHECK-NEXT:    fmul s2, s1, s1
50; CHECK-NEXT:    fmul s1, s1, s0
51; CHECK-NEXT:    frsqrts s2, s0, s2
52; CHECK-NEXT:    fmul s1, s2, s1
53; CHECK-NEXT:    fcsel s0, s0, s1, eq
54; CHECK-NEXT:    ret
55  %1 = tail call fast float @llvm.sqrt.f32(float %a)
56  ret float %1
57}
58
59define <2 x float> @f2sqrt(<2 x float> %a) #0 {
60; FAULT-LABEL: f2sqrt:
61; FAULT:       // %bb.0:
62; FAULT-NEXT:    fsqrt v0.2s, v0.2s
63; FAULT-NEXT:    ret
64;
65; CHECK-LABEL: f2sqrt:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    frsqrte v1.2s, v0.2s
68; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
69; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
70; CHECK-NEXT:    fmul v1.2s, v1.2s, v2.2s
71; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
72; CHECK-NEXT:    fmul v1.2s, v1.2s, v0.2s
73; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
74; CHECK-NEXT:    fmul v1.2s, v2.2s, v1.2s
75; CHECK-NEXT:    fcmeq v2.2s, v0.2s, #0.0
76; CHECK-NEXT:    bif v0.8b, v1.8b, v2.8b
77; CHECK-NEXT:    ret
78  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
79  ret <2 x float> %1
80}
81
82define <4 x float> @f4sqrt(<4 x float> %a) #0 {
83; FAULT-LABEL: f4sqrt:
84; FAULT:       // %bb.0:
85; FAULT-NEXT:    fsqrt v0.4s, v0.4s
86; FAULT-NEXT:    ret
87;
88; CHECK-LABEL: f4sqrt:
89; CHECK:       // %bb.0:
90; CHECK-NEXT:    frsqrte v1.4s, v0.4s
91; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
92; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
93; CHECK-NEXT:    fmul v1.4s, v1.4s, v2.4s
94; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
95; CHECK-NEXT:    fmul v1.4s, v1.4s, v0.4s
96; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
97; CHECK-NEXT:    fmul v1.4s, v2.4s, v1.4s
98; CHECK-NEXT:    fcmeq v2.4s, v0.4s, #0.0
99; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
100; CHECK-NEXT:    ret
101  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
102  ret <4 x float> %1
103}
104
105define <8 x float> @f8sqrt(<8 x float> %a) #0 {
106; FAULT-LABEL: f8sqrt:
107; FAULT:       // %bb.0:
108; FAULT-NEXT:    fsqrt v0.4s, v0.4s
109; FAULT-NEXT:    fsqrt v1.4s, v1.4s
110; FAULT-NEXT:    ret
111;
112; CHECK-LABEL: f8sqrt:
113; CHECK:       // %bb.0:
114; CHECK-NEXT:    frsqrte v2.4s, v0.4s
115; CHECK-NEXT:    frsqrte v3.4s, v1.4s
116; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
117; CHECK-NEXT:    frsqrts v4.4s, v0.4s, v4.4s
118; CHECK-NEXT:    fmul v5.4s, v3.4s, v3.4s
119; CHECK-NEXT:    frsqrts v5.4s, v1.4s, v5.4s
120; CHECK-NEXT:    fmul v2.4s, v2.4s, v4.4s
121; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
122; CHECK-NEXT:    fmul v2.4s, v2.4s, v0.4s
123; CHECK-NEXT:    frsqrts v4.4s, v0.4s, v4.4s
124; CHECK-NEXT:    fmul v3.4s, v3.4s, v5.4s
125; CHECK-NEXT:    fmul v5.4s, v3.4s, v3.4s
126; CHECK-NEXT:    fmul v3.4s, v3.4s, v1.4s
127; CHECK-NEXT:    frsqrts v5.4s, v1.4s, v5.4s
128; CHECK-NEXT:    fmul v2.4s, v4.4s, v2.4s
129; CHECK-NEXT:    fcmeq v4.4s, v0.4s, #0.0
130; CHECK-NEXT:    bif v0.16b, v2.16b, v4.16b
131; CHECK-NEXT:    fmul v3.4s, v5.4s, v3.4s
132; CHECK-NEXT:    fcmeq v5.4s, v1.4s, #0.0
133; CHECK-NEXT:    bif v1.16b, v3.16b, v5.16b
134; CHECK-NEXT:    ret
135  %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
136  ret <8 x float> %1
137}
138
139define double @dsqrt(double %a) #0 {
140; FAULT-LABEL: dsqrt:
141; FAULT:       // %bb.0:
142; FAULT-NEXT:    fsqrt d0, d0
143; FAULT-NEXT:    ret
144;
145; CHECK-LABEL: dsqrt:
146; CHECK:       // %bb.0:
147; CHECK-NEXT:    frsqrte d1, d0
148; CHECK-NEXT:    fcmp d0, #0.0
149; CHECK-NEXT:    fmul d2, d1, d1
150; CHECK-NEXT:    frsqrts d2, d0, d2
151; CHECK-NEXT:    fmul d1, d1, d2
152; CHECK-NEXT:    fmul d2, d1, d1
153; CHECK-NEXT:    frsqrts d2, d0, d2
154; CHECK-NEXT:    fmul d1, d1, d2
155; CHECK-NEXT:    fmul d2, d1, d1
156; CHECK-NEXT:    fmul d1, d1, d0
157; CHECK-NEXT:    frsqrts d2, d0, d2
158; CHECK-NEXT:    fmul d1, d2, d1
159; CHECK-NEXT:    fcsel d0, d0, d1, eq
160; CHECK-NEXT:    ret
161  %1 = tail call fast double @llvm.sqrt.f64(double %a)
162  ret double %1
163}
164
165define double @dsqrt_ieee_denorms(double %a) #1 {
166; FAULT-LABEL: dsqrt_ieee_denorms:
167; FAULT:       // %bb.0:
168; FAULT-NEXT:    fsqrt d0, d0
169; FAULT-NEXT:    ret
170;
171; CHECK-LABEL: dsqrt_ieee_denorms:
172; CHECK:       // %bb.0:
173; CHECK-NEXT:    frsqrte d1, d0
174; CHECK-NEXT:    fcmp d0, #0.0
175; CHECK-NEXT:    fmul d2, d1, d1
176; CHECK-NEXT:    frsqrts d2, d0, d2
177; CHECK-NEXT:    fmul d1, d1, d2
178; CHECK-NEXT:    fmul d2, d1, d1
179; CHECK-NEXT:    frsqrts d2, d0, d2
180; CHECK-NEXT:    fmul d1, d1, d2
181; CHECK-NEXT:    fmul d2, d1, d1
182; CHECK-NEXT:    fmul d1, d1, d0
183; CHECK-NEXT:    frsqrts d2, d0, d2
184; CHECK-NEXT:    fmul d1, d2, d1
185; CHECK-NEXT:    fcsel d0, d0, d1, eq
186; CHECK-NEXT:    ret
187  %1 = tail call fast double @llvm.sqrt.f64(double %a)
188  ret double %1
189}
190
191define <2 x double> @d2sqrt(<2 x double> %a) #0 {
192; FAULT-LABEL: d2sqrt:
193; FAULT:       // %bb.0:
194; FAULT-NEXT:    fsqrt v0.2d, v0.2d
195; FAULT-NEXT:    ret
196;
197; CHECK-LABEL: d2sqrt:
198; CHECK:       // %bb.0:
199; CHECK-NEXT:    frsqrte v1.2d, v0.2d
200; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
201; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
202; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
203; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
204; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
205; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
206; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
207; CHECK-NEXT:    fmul v1.2d, v1.2d, v0.2d
208; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
209; CHECK-NEXT:    fmul v1.2d, v2.2d, v1.2d
210; CHECK-NEXT:    fcmeq v2.2d, v0.2d, #0.0
211; CHECK-NEXT:    bif v0.16b, v1.16b, v2.16b
212; CHECK-NEXT:    ret
213  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
214  ret <2 x double> %1
215}
216
217define <4 x double> @d4sqrt(<4 x double> %a) #0 {
218; FAULT-LABEL: d4sqrt:
219; FAULT:       // %bb.0:
220; FAULT-NEXT:    fsqrt v0.2d, v0.2d
221; FAULT-NEXT:    fsqrt v1.2d, v1.2d
222; FAULT-NEXT:    ret
223;
224; CHECK-LABEL: d4sqrt:
225; CHECK:       // %bb.0:
226; CHECK-NEXT:    frsqrte v2.2d, v0.2d
227; CHECK-NEXT:    frsqrte v3.2d, v1.2d
228; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
229; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
230; CHECK-NEXT:    fmul v5.2d, v3.2d, v3.2d
231; CHECK-NEXT:    frsqrts v5.2d, v1.2d, v5.2d
232; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
233; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
234; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
235; CHECK-NEXT:    fmul v3.2d, v3.2d, v5.2d
236; CHECK-NEXT:    fmul v5.2d, v3.2d, v3.2d
237; CHECK-NEXT:    frsqrts v5.2d, v1.2d, v5.2d
238; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
239; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
240; CHECK-NEXT:    fmul v2.2d, v2.2d, v0.2d
241; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
242; CHECK-NEXT:    fmul v3.2d, v3.2d, v5.2d
243; CHECK-NEXT:    fmul v5.2d, v3.2d, v3.2d
244; CHECK-NEXT:    fmul v3.2d, v3.2d, v1.2d
245; CHECK-NEXT:    frsqrts v5.2d, v1.2d, v5.2d
246; CHECK-NEXT:    fmul v2.2d, v4.2d, v2.2d
247; CHECK-NEXT:    fcmeq v4.2d, v0.2d, #0.0
248; CHECK-NEXT:    bif v0.16b, v2.16b, v4.16b
249; CHECK-NEXT:    fmul v3.2d, v5.2d, v3.2d
250; CHECK-NEXT:    fcmeq v5.2d, v1.2d, #0.0
251; CHECK-NEXT:    bif v1.16b, v3.16b, v5.16b
252; CHECK-NEXT:    ret
253  %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
254  ret <4 x double> %1
255}
256
257define float @frsqrt(float %a) #0 {
258; FAULT-LABEL: frsqrt:
259; FAULT:       // %bb.0:
260; FAULT-NEXT:    fsqrt s0, s0
261; FAULT-NEXT:    fmov s1, #1.00000000
262; FAULT-NEXT:    fdiv s0, s1, s0
263; FAULT-NEXT:    ret
264;
265; CHECK-LABEL: frsqrt:
266; CHECK:       // %bb.0:
267; CHECK-NEXT:    frsqrte s1, s0
268; CHECK-NEXT:    fmul s2, s1, s1
269; CHECK-NEXT:    frsqrts s2, s0, s2
270; CHECK-NEXT:    fmul s1, s1, s2
271; CHECK-NEXT:    fmul s2, s1, s1
272; CHECK-NEXT:    frsqrts s0, s0, s2
273; CHECK-NEXT:    fmul s0, s1, s0
274; CHECK-NEXT:    ret
275  %1 = tail call fast float @llvm.sqrt.f32(float %a)
276  %2 = fdiv fast float 1.000000e+00, %1
277  ret float %2
278}
279
280define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
281; FAULT-LABEL: f2rsqrt:
282; FAULT:       // %bb.0:
283; FAULT-NEXT:    fsqrt v0.2s, v0.2s
284; FAULT-NEXT:    fmov v1.2s, #1.00000000
285; FAULT-NEXT:    fdiv v0.2s, v1.2s, v0.2s
286; FAULT-NEXT:    ret
287;
288; CHECK-LABEL: f2rsqrt:
289; CHECK:       // %bb.0:
290; CHECK-NEXT:    frsqrte v1.2s, v0.2s
291; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
292; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
293; CHECK-NEXT:    fmul v1.2s, v1.2s, v2.2s
294; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
295; CHECK-NEXT:    frsqrts v0.2s, v0.2s, v2.2s
296; CHECK-NEXT:    fmul v0.2s, v1.2s, v0.2s
297; CHECK-NEXT:    ret
298  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
299  %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
300  ret <2 x float> %2
301}
302
303define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
304; FAULT-LABEL: f4rsqrt:
305; FAULT:       // %bb.0:
306; FAULT-NEXT:    fsqrt v0.4s, v0.4s
307; FAULT-NEXT:    fmov v1.4s, #1.00000000
308; FAULT-NEXT:    fdiv v0.4s, v1.4s, v0.4s
309; FAULT-NEXT:    ret
310;
311; CHECK-LABEL: f4rsqrt:
312; CHECK:       // %bb.0:
313; CHECK-NEXT:    frsqrte v1.4s, v0.4s
314; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
315; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
316; CHECK-NEXT:    fmul v1.4s, v1.4s, v2.4s
317; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
318; CHECK-NEXT:    frsqrts v0.4s, v0.4s, v2.4s
319; CHECK-NEXT:    fmul v0.4s, v1.4s, v0.4s
320; CHECK-NEXT:    ret
321  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
322  %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
323  ret <4 x float> %2
324}
325
326define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
327; FAULT-LABEL: f8rsqrt:
328; FAULT:       // %bb.0:
329; FAULT-NEXT:    fsqrt v0.4s, v0.4s
330; FAULT-NEXT:    fmov v2.4s, #1.00000000
331; FAULT-NEXT:    fsqrt v1.4s, v1.4s
332; FAULT-NEXT:    fdiv v0.4s, v2.4s, v0.4s
333; FAULT-NEXT:    fdiv v1.4s, v2.4s, v1.4s
334; FAULT-NEXT:    ret
335;
336; CHECK-LABEL: f8rsqrt:
337; CHECK:       // %bb.0:
338; CHECK-NEXT:    frsqrte v2.4s, v0.4s
339; CHECK-NEXT:    frsqrte v3.4s, v1.4s
340; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
341; CHECK-NEXT:    frsqrts v4.4s, v0.4s, v4.4s
342; CHECK-NEXT:    fmul v5.4s, v3.4s, v3.4s
343; CHECK-NEXT:    frsqrts v5.4s, v1.4s, v5.4s
344; CHECK-NEXT:    fmul v2.4s, v2.4s, v4.4s
345; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
346; CHECK-NEXT:    frsqrts v0.4s, v0.4s, v4.4s
347; CHECK-NEXT:    fmul v3.4s, v3.4s, v5.4s
348; CHECK-NEXT:    fmul v4.4s, v3.4s, v3.4s
349; CHECK-NEXT:    frsqrts v1.4s, v1.4s, v4.4s
350; CHECK-NEXT:    fmul v0.4s, v2.4s, v0.4s
351; CHECK-NEXT:    fmul v1.4s, v3.4s, v1.4s
352; CHECK-NEXT:    ret
353  %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
354  %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
355  ret <8 x float> %2
356}
357
358define double @drsqrt(double %a) #0 {
359; FAULT-LABEL: drsqrt:
360; FAULT:       // %bb.0:
361; FAULT-NEXT:    fsqrt d0, d0
362; FAULT-NEXT:    fmov d1, #1.00000000
363; FAULT-NEXT:    fdiv d0, d1, d0
364; FAULT-NEXT:    ret
365;
366; CHECK-LABEL: drsqrt:
367; CHECK:       // %bb.0:
368; CHECK-NEXT:    frsqrte d1, d0
369; CHECK-NEXT:    fmul d2, d1, d1
370; CHECK-NEXT:    frsqrts d2, d0, d2
371; CHECK-NEXT:    fmul d1, d1, d2
372; CHECK-NEXT:    fmul d2, d1, d1
373; CHECK-NEXT:    frsqrts d2, d0, d2
374; CHECK-NEXT:    fmul d1, d1, d2
375; CHECK-NEXT:    fmul d2, d1, d1
376; CHECK-NEXT:    frsqrts d0, d0, d2
377; CHECK-NEXT:    fmul d0, d1, d0
378; CHECK-NEXT:    ret
379  %1 = tail call fast double @llvm.sqrt.f64(double %a)
380  %2 = fdiv fast double 1.000000e+00, %1
381  ret double %2
382}
383
384define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
385; FAULT-LABEL: d2rsqrt:
386; FAULT:       // %bb.0:
387; FAULT-NEXT:    fsqrt v0.2d, v0.2d
388; FAULT-NEXT:    fmov v1.2d, #1.00000000
389; FAULT-NEXT:    fdiv v0.2d, v1.2d, v0.2d
390; FAULT-NEXT:    ret
391;
392; CHECK-LABEL: d2rsqrt:
393; CHECK:       // %bb.0:
394; CHECK-NEXT:    frsqrte v1.2d, v0.2d
395; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
396; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
397; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
398; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
399; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
400; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
401; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
402; CHECK-NEXT:    frsqrts v0.2d, v0.2d, v2.2d
403; CHECK-NEXT:    fmul v0.2d, v1.2d, v0.2d
404; CHECK-NEXT:    ret
405  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
406  %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
407  ret <2 x double> %2
408}
409
410define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
411; FAULT-LABEL: d4rsqrt:
412; FAULT:       // %bb.0:
413; FAULT-NEXT:    fsqrt v0.2d, v0.2d
414; FAULT-NEXT:    fmov v2.2d, #1.00000000
415; FAULT-NEXT:    fsqrt v1.2d, v1.2d
416; FAULT-NEXT:    fdiv v0.2d, v2.2d, v0.2d
417; FAULT-NEXT:    fdiv v1.2d, v2.2d, v1.2d
418; FAULT-NEXT:    ret
419;
420; CHECK-LABEL: d4rsqrt:
421; CHECK:       // %bb.0:
422; CHECK-NEXT:    frsqrte v2.2d, v0.2d
423; CHECK-NEXT:    frsqrte v3.2d, v1.2d
424; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
425; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
426; CHECK-NEXT:    fmul v5.2d, v3.2d, v3.2d
427; CHECK-NEXT:    frsqrts v5.2d, v1.2d, v5.2d
428; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
429; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
430; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
431; CHECK-NEXT:    fmul v3.2d, v3.2d, v5.2d
432; CHECK-NEXT:    fmul v5.2d, v3.2d, v3.2d
433; CHECK-NEXT:    frsqrts v5.2d, v1.2d, v5.2d
434; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
435; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
436; CHECK-NEXT:    frsqrts v0.2d, v0.2d, v4.2d
437; CHECK-NEXT:    fmul v3.2d, v3.2d, v5.2d
438; CHECK-NEXT:    fmul v4.2d, v3.2d, v3.2d
439; CHECK-NEXT:    frsqrts v1.2d, v1.2d, v4.2d
440; CHECK-NEXT:    fmul v0.2d, v2.2d, v0.2d
441; CHECK-NEXT:    fmul v1.2d, v3.2d, v1.2d
442; CHECK-NEXT:    ret
443  %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
444  %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1
445  ret <4 x double> %2
446}
447
448define double @sqrt_fdiv_common_operand(double %x) nounwind {
449; FAULT-LABEL: sqrt_fdiv_common_operand:
450; FAULT:       // %bb.0:
451; FAULT-NEXT:    fsqrt d0, d0
452; FAULT-NEXT:    ret
453;
454; CHECK-LABEL: sqrt_fdiv_common_operand:
455; CHECK:       // %bb.0:
456; CHECK-NEXT:    frsqrte d1, d0
457; CHECK-NEXT:    fmul d2, d1, d1
458; CHECK-NEXT:    frsqrts d2, d0, d2
459; CHECK-NEXT:    fmul d1, d1, d2
460; CHECK-NEXT:    fmul d2, d1, d1
461; CHECK-NEXT:    frsqrts d2, d0, d2
462; CHECK-NEXT:    fmul d1, d1, d2
463; CHECK-NEXT:    fmul d2, d1, d1
464; CHECK-NEXT:    frsqrts d2, d0, d2
465; CHECK-NEXT:    fmul d1, d1, d2
466; CHECK-NEXT:    fmul d0, d0, d1
467; CHECK-NEXT:    ret
468  %sqrt = call fast double @llvm.sqrt.f64(double %x)
469  %r = fdiv fast double %x, %sqrt
470  ret double %r
471}
472
473define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind {
474; FAULT-LABEL: sqrt_fdiv_common_operand_vec:
475; FAULT:       // %bb.0:
476; FAULT-NEXT:    fsqrt v0.2d, v0.2d
477; FAULT-NEXT:    ret
478;
479; CHECK-LABEL: sqrt_fdiv_common_operand_vec:
480; CHECK:       // %bb.0:
481; CHECK-NEXT:    frsqrte v1.2d, v0.2d
482; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
483; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
484; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
485; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
486; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
487; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
488; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
489; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
490; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
491; CHECK-NEXT:    fmul v0.2d, v0.2d, v1.2d
492; CHECK-NEXT:    ret
493  %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
494  %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt
495  ret <2 x double> %r
496}
497
498define double @sqrt_fdiv_common_operand_extra_use(double %x, double* %p) nounwind {
499; FAULT-LABEL: sqrt_fdiv_common_operand_extra_use:
500; FAULT:       // %bb.0:
501; FAULT-NEXT:    fsqrt d0, d0
502; FAULT-NEXT:    str d0, [x0]
503; FAULT-NEXT:    ret
504;
505; CHECK-LABEL: sqrt_fdiv_common_operand_extra_use:
506; CHECK:       // %bb.0:
507; CHECK-NEXT:    frsqrte d1, d0
508; CHECK-NEXT:    fcmp d0, #0.0
509; CHECK-NEXT:    fmul d2, d1, d1
510; CHECK-NEXT:    frsqrts d2, d0, d2
511; CHECK-NEXT:    fmul d1, d1, d2
512; CHECK-NEXT:    fmul d2, d1, d1
513; CHECK-NEXT:    frsqrts d2, d0, d2
514; CHECK-NEXT:    fmul d1, d1, d2
515; CHECK-NEXT:    fmul d2, d1, d1
516; CHECK-NEXT:    frsqrts d2, d0, d2
517; CHECK-NEXT:    fmul d1, d1, d2
518; CHECK-NEXT:    fmul d1, d0, d1
519; CHECK-NEXT:    fcsel d2, d0, d1, eq
520; CHECK-NEXT:    fmov d0, d1
521; CHECK-NEXT:    str d2, [x0]
522; CHECK-NEXT:    ret
523  %sqrt = call fast double @llvm.sqrt.f64(double %x)
524  store double %sqrt, double* %p
525  %r = fdiv fast double %x, %sqrt
526  ret double %r
527}
528
529define double @sqrt_simplify_before_recip_3_uses(double %x, double* %p1, double* %p2) nounwind {
530; FAULT-LABEL: sqrt_simplify_before_recip_3_uses:
531; FAULT:       // %bb.0:
532; FAULT-NEXT:    fsqrt d0, d0
533; FAULT-NEXT:    mov x8, #4631107791820423168
534; FAULT-NEXT:    fmov d1, #1.00000000
535; FAULT-NEXT:    fmov d2, x8
536; FAULT-NEXT:    fdiv d1, d1, d0
537; FAULT-NEXT:    fdiv d2, d2, d0
538; FAULT-NEXT:    str d1, [x0]
539; FAULT-NEXT:    str d2, [x1]
540; FAULT-NEXT:    ret
541;
542; CHECK-LABEL: sqrt_simplify_before_recip_3_uses:
543; CHECK:       // %bb.0:
544; CHECK-NEXT:    frsqrte d1, d0
545; CHECK-NEXT:    mov x8, #4631107791820423168
546; CHECK-NEXT:    fmov d2, x8
547; CHECK-NEXT:    fmul d3, d1, d1
548; CHECK-NEXT:    frsqrts d3, d0, d3
549; CHECK-NEXT:    fmul d1, d1, d3
550; CHECK-NEXT:    fmul d3, d1, d1
551; CHECK-NEXT:    frsqrts d3, d0, d3
552; CHECK-NEXT:    fmul d1, d1, d3
553; CHECK-NEXT:    fmul d3, d1, d1
554; CHECK-NEXT:    frsqrts d3, d0, d3
555; CHECK-NEXT:    fmul d1, d1, d3
556; CHECK-NEXT:    fmul d0, d0, d1
557; CHECK-NEXT:    fmul d2, d1, d2
558; CHECK-NEXT:    str d1, [x0]
559; CHECK-NEXT:    str d2, [x1]
560; CHECK-NEXT:    ret
561  %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
562  %rsqrt = fdiv fast double 1.0, %sqrt
563  %r = fdiv fast double 42.0, %sqrt
564  %sqrt_fast = fdiv fast double %x, %sqrt
565  store double %rsqrt, double* %p1, align 8
566  store double %r, double* %p2, align 8
567  ret double %sqrt_fast
568}
569
570define double @sqrt_simplify_before_recip_3_uses_order(double %x, double* %p1, double* %p2) nounwind {
571; FAULT-LABEL: sqrt_simplify_before_recip_3_uses_order:
572; FAULT:       // %bb.0:
573; FAULT-NEXT:    fsqrt d0, d0
574; FAULT-NEXT:    mov x8, #4631107791820423168
575; FAULT-NEXT:    fmov d1, x8
576; FAULT-NEXT:    mov x8, #140737488355328
577; FAULT-NEXT:    movk x8, #16453, lsl #48
578; FAULT-NEXT:    fmov d2, x8
579; FAULT-NEXT:    fdiv d1, d1, d0
580; FAULT-NEXT:    fdiv d2, d2, d0
581; FAULT-NEXT:    str d1, [x0]
582; FAULT-NEXT:    str d2, [x1]
583; FAULT-NEXT:    ret
584;
585; CHECK-LABEL: sqrt_simplify_before_recip_3_uses_order:
586; CHECK:       // %bb.0:
587; CHECK-NEXT:    frsqrte d1, d0
588; CHECK-NEXT:    mov x9, #140737488355328
589; CHECK-NEXT:    mov x8, #4631107791820423168
590; CHECK-NEXT:    movk x9, #16453, lsl #48
591; CHECK-NEXT:    fmov d3, x9
592; CHECK-NEXT:    fmul d2, d1, d1
593; CHECK-NEXT:    frsqrts d2, d0, d2
594; CHECK-NEXT:    fmul d1, d1, d2
595; CHECK-NEXT:    fmul d2, d1, d1
596; CHECK-NEXT:    frsqrts d2, d0, d2
597; CHECK-NEXT:    fmul d1, d1, d2
598; CHECK-NEXT:    fmul d2, d1, d1
599; CHECK-NEXT:    frsqrts d2, d0, d2
600; CHECK-NEXT:    fmul d1, d1, d2
601; CHECK-NEXT:    fmov d2, x8
602; CHECK-NEXT:    fmul d0, d0, d1
603; CHECK-NEXT:    fmul d2, d1, d2
604; CHECK-NEXT:    fmul d1, d1, d3
605; CHECK-NEXT:    str d2, [x0]
606; CHECK-NEXT:    str d1, [x1]
607; CHECK-NEXT:    ret
608  %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
609  %sqrt_fast = fdiv fast double %x, %sqrt
610  %r1 = fdiv fast double 42.0, %sqrt
611  %r2 = fdiv fast double 43.0, %sqrt
612  store double %r1, double* %p1, align 8
613  store double %r2, double* %p2, align 8
614  ret double %sqrt_fast
615}
616
617
618define double @sqrt_simplify_before_recip_4_uses(double %x, double* %p1, double* %p2, double* %p3) nounwind {
619; FAULT-LABEL: sqrt_simplify_before_recip_4_uses:
620; FAULT:       // %bb.0:
621; FAULT-NEXT:    fsqrt d0, d0
622; FAULT-NEXT:    fmov d1, #1.00000000
623; FAULT-NEXT:    mov x9, #140737488355328
624; FAULT-NEXT:    mov x8, #4631107791820423168
625; FAULT-NEXT:    movk x9, #16453, lsl #48
626; FAULT-NEXT:    fmov d2, x8
627; FAULT-NEXT:    fmov d3, x9
628; FAULT-NEXT:    fdiv d1, d1, d0
629; FAULT-NEXT:    fmul d2, d1, d2
630; FAULT-NEXT:    fmul d3, d1, d3
631; FAULT-NEXT:    str d1, [x0]
632; FAULT-NEXT:    str d2, [x1]
633; FAULT-NEXT:    str d3, [x2]
634; FAULT-NEXT:    ret
635;
636; CHECK-LABEL: sqrt_simplify_before_recip_4_uses:
637; CHECK:       // %bb.0:
638; CHECK-NEXT:    frsqrte d1, d0
639; CHECK-NEXT:    fcmp d0, #0.0
640; CHECK-NEXT:    mov x9, #140737488355328
641; CHECK-NEXT:    mov x8, #4631107791820423168
642; CHECK-NEXT:    movk x9, #16453, lsl #48
643; CHECK-NEXT:    fmov d3, x9
644; CHECK-NEXT:    fmul d2, d1, d1
645; CHECK-NEXT:    frsqrts d2, d0, d2
646; CHECK-NEXT:    fmul d1, d1, d2
647; CHECK-NEXT:    fmul d2, d1, d1
648; CHECK-NEXT:    frsqrts d2, d0, d2
649; CHECK-NEXT:    fmul d1, d1, d2
650; CHECK-NEXT:    fmul d2, d1, d1
651; CHECK-NEXT:    frsqrts d2, d0, d2
652; CHECK-NEXT:    fmul d1, d1, d2
653; CHECK-NEXT:    fmul d2, d0, d1
654; CHECK-NEXT:    fmul d3, d1, d3
655; CHECK-NEXT:    str d1, [x0]
656; CHECK-NEXT:    fcsel d2, d0, d2, eq
657; CHECK-NEXT:    fdiv d0, d0, d2
658; CHECK-NEXT:    fmov d2, x8
659; CHECK-NEXT:    fmul d2, d1, d2
660; CHECK-NEXT:    str d2, [x1]
661; CHECK-NEXT:    str d3, [x2]
662; CHECK-NEXT:    ret
663  %sqrt = tail call fast double @llvm.sqrt.f64(double %x)
664  %rsqrt = fdiv fast double 1.0, %sqrt
665  %r1 = fdiv fast double 42.0, %sqrt
666  %r2 = fdiv fast double 43.0, %sqrt
667  %sqrt_fast = fdiv fast double %x, %sqrt
668  store double %rsqrt, double* %p1, align 8
669  store double %r1, double* %p2, align 8
670  store double %r2, double* %p3, align 8
671  ret double %sqrt_fast
672}
673
674attributes #0 = { "unsafe-fp-math"="true" }
675attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" }
676