1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
3; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
4
5declare float @llvm.sqrt.f32(float) #0
6declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
7declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
8declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
9declare double @llvm.sqrt.f64(double) #0
10declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
11declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
12
13define float @fsqrt(float %a) #0 {
14; FAULT-LABEL: fsqrt:
15; FAULT:       // %bb.0:
16; FAULT-NEXT:    fsqrt s0, s0
17; FAULT-NEXT:    ret
18;
19; CHECK-LABEL: fsqrt:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    frsqrte s1, s0
22; CHECK-NEXT:    fmul s2, s1, s1
23; CHECK-NEXT:    frsqrts s2, s0, s2
24; CHECK-NEXT:    fmul s1, s1, s2
25; CHECK-NEXT:    fmul s2, s1, s1
26; CHECK-NEXT:    frsqrts s2, s0, s2
27; CHECK-NEXT:    fmul s2, s2, s0
28; CHECK-NEXT:    fmul s1, s1, s2
29; CHECK-NEXT:    fcmp s0, #0.0
30; CHECK-NEXT:    fcsel s0, s0, s1, eq
31; CHECK-NEXT:    ret
32  %1 = tail call fast float @llvm.sqrt.f32(float %a)
33  ret float %1
34}
35
36define float @fsqrt_ieee_denorms(float %a) #1 {
37; FAULT-LABEL: fsqrt_ieee_denorms:
38; FAULT:       // %bb.0:
39; FAULT-NEXT:    fsqrt s0, s0
40; FAULT-NEXT:    ret
41;
42; CHECK-LABEL: fsqrt_ieee_denorms:
43; CHECK:       // %bb.0:
44; CHECK-NEXT:    frsqrte s1, s0
45; CHECK-NEXT:    fmul s2, s1, s1
46; CHECK-NEXT:    frsqrts s2, s0, s2
47; CHECK-NEXT:    fmul s1, s1, s2
48; CHECK-NEXT:    fmul s2, s1, s1
49; CHECK-NEXT:    frsqrts s2, s0, s2
50; CHECK-NEXT:    fmul s2, s2, s0
51; CHECK-NEXT:    fmul s1, s1, s2
52; CHECK-NEXT:    fcmp s0, #0.0
53; CHECK-NEXT:    fcsel s0, s0, s1, eq
54; CHECK-NEXT:    ret
55  %1 = tail call fast float @llvm.sqrt.f32(float %a)
56  ret float %1
57}
58
59define <2 x float> @f2sqrt(<2 x float> %a) #0 {
60; FAULT-LABEL: f2sqrt:
61; FAULT:       // %bb.0:
62; FAULT-NEXT:    fsqrt v0.2s, v0.2s
63; FAULT-NEXT:    ret
64;
65; CHECK-LABEL: f2sqrt:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    frsqrte v1.2s, v0.2s
68; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
69; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
70; CHECK-NEXT:    fmul v1.2s, v1.2s, v2.2s
71; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
72; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
73; CHECK-NEXT:    fmul v2.2s, v2.2s, v0.2s
74; CHECK-NEXT:    fmul v2.2s, v1.2s, v2.2s
75; CHECK-NEXT:    fcmeq v1.2s, v0.2s, #0.0
76; CHECK-NEXT:    bsl v1.8b, v0.8b, v2.8b
77; CHECK-NEXT:    mov v0.16b, v1.16b
78; CHECK-NEXT:    ret
79  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
80  ret <2 x float> %1
81}
82
83define <4 x float> @f4sqrt(<4 x float> %a) #0 {
84; FAULT-LABEL: f4sqrt:
85; FAULT:       // %bb.0:
86; FAULT-NEXT:    fsqrt v0.4s, v0.4s
87; FAULT-NEXT:    ret
88;
89; CHECK-LABEL: f4sqrt:
90; CHECK:       // %bb.0:
91; CHECK-NEXT:    frsqrte v1.4s, v0.4s
92; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
93; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
94; CHECK-NEXT:    fmul v1.4s, v1.4s, v2.4s
95; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
96; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
97; CHECK-NEXT:    fmul v2.4s, v2.4s, v0.4s
98; CHECK-NEXT:    fmul v2.4s, v1.4s, v2.4s
99; CHECK-NEXT:    fcmeq v1.4s, v0.4s, #0.0
100; CHECK-NEXT:    bsl v1.16b, v0.16b, v2.16b
101; CHECK-NEXT:    mov v0.16b, v1.16b
102; CHECK-NEXT:    ret
103  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
104  ret <4 x float> %1
105}
106
107define <8 x float> @f8sqrt(<8 x float> %a) #0 {
108; FAULT-LABEL: f8sqrt:
109; FAULT:       // %bb.0:
110; FAULT-NEXT:    fsqrt v0.4s, v0.4s
111; FAULT-NEXT:    fsqrt v1.4s, v1.4s
112; FAULT-NEXT:    ret
113;
114; CHECK-LABEL: f8sqrt:
115; CHECK:       // %bb.0:
116; CHECK-NEXT:    frsqrte v2.4s, v0.4s
117; CHECK-NEXT:    fmul v3.4s, v2.4s, v2.4s
118; CHECK-NEXT:    frsqrts v3.4s, v0.4s, v3.4s
119; CHECK-NEXT:    fmul v2.4s, v2.4s, v3.4s
120; CHECK-NEXT:    fmul v3.4s, v2.4s, v2.4s
121; CHECK-NEXT:    frsqrts v3.4s, v0.4s, v3.4s
122; CHECK-NEXT:    fmul v3.4s, v3.4s, v0.4s
123; CHECK-NEXT:    fmul v3.4s, v2.4s, v3.4s
124; CHECK-NEXT:    fcmeq v2.4s, v0.4s, #0.0
125; CHECK-NEXT:    bsl v2.16b, v0.16b, v3.16b
126; CHECK-NEXT:    frsqrte v0.4s, v1.4s
127; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
128; CHECK-NEXT:    frsqrts v3.4s, v1.4s, v3.4s
129; CHECK-NEXT:    fmul v0.4s, v0.4s, v3.4s
130; CHECK-NEXT:    fmul v3.4s, v0.4s, v0.4s
131; CHECK-NEXT:    frsqrts v3.4s, v1.4s, v3.4s
132; CHECK-NEXT:    fmul v3.4s, v3.4s, v1.4s
133; CHECK-NEXT:    fmul v0.4s, v0.4s, v3.4s
134; CHECK-NEXT:    fcmeq v3.4s, v1.4s, #0.0
135; CHECK-NEXT:    bsl v3.16b, v1.16b, v0.16b
136; CHECK-NEXT:    mov v0.16b, v2.16b
137; CHECK-NEXT:    mov v1.16b, v3.16b
138; CHECK-NEXT:    ret
139  %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
140  ret <8 x float> %1
141}
142
143define double @dsqrt(double %a) #0 {
144; FAULT-LABEL: dsqrt:
145; FAULT:       // %bb.0:
146; FAULT-NEXT:    fsqrt d0, d0
147; FAULT-NEXT:    ret
148;
149; CHECK-LABEL: dsqrt:
150; CHECK:       // %bb.0:
151; CHECK-NEXT:    frsqrte d1, d0
152; CHECK-NEXT:    fmul d2, d1, d1
153; CHECK-NEXT:    frsqrts d2, d0, d2
154; CHECK-NEXT:    fmul d1, d1, d2
155; CHECK-NEXT:    fmul d2, d1, d1
156; CHECK-NEXT:    frsqrts d2, d0, d2
157; CHECK-NEXT:    fmul d1, d1, d2
158; CHECK-NEXT:    fmul d2, d1, d1
159; CHECK-NEXT:    frsqrts d2, d0, d2
160; CHECK-NEXT:    fmul d2, d2, d0
161; CHECK-NEXT:    fmul d1, d1, d2
162; CHECK-NEXT:    fcmp d0, #0.0
163; CHECK-NEXT:    fcsel d0, d0, d1, eq
164; CHECK-NEXT:    ret
165  %1 = tail call fast double @llvm.sqrt.f64(double %a)
166  ret double %1
167}
168
169define double @dsqrt_ieee_denorms(double %a) #1 {
170; FAULT-LABEL: dsqrt_ieee_denorms:
171; FAULT:       // %bb.0:
172; FAULT-NEXT:    fsqrt d0, d0
173; FAULT-NEXT:    ret
174;
175; CHECK-LABEL: dsqrt_ieee_denorms:
176; CHECK:       // %bb.0:
177; CHECK-NEXT:    frsqrte d1, d0
178; CHECK-NEXT:    fmul d2, d1, d1
179; CHECK-NEXT:    frsqrts d2, d0, d2
180; CHECK-NEXT:    fmul d1, d1, d2
181; CHECK-NEXT:    fmul d2, d1, d1
182; CHECK-NEXT:    frsqrts d2, d0, d2
183; CHECK-NEXT:    fmul d1, d1, d2
184; CHECK-NEXT:    fmul d2, d1, d1
185; CHECK-NEXT:    frsqrts d2, d0, d2
186; CHECK-NEXT:    fmul d2, d2, d0
187; CHECK-NEXT:    fmul d1, d1, d2
188; CHECK-NEXT:    fcmp d0, #0.0
189; CHECK-NEXT:    fcsel d0, d0, d1, eq
190; CHECK-NEXT:    ret
191  %1 = tail call fast double @llvm.sqrt.f64(double %a)
192  ret double %1
193}
194
195define <2 x double> @d2sqrt(<2 x double> %a) #0 {
196; FAULT-LABEL: d2sqrt:
197; FAULT:       // %bb.0:
198; FAULT-NEXT:    fsqrt v0.2d, v0.2d
199; FAULT-NEXT:    ret
200;
201; CHECK-LABEL: d2sqrt:
202; CHECK:       // %bb.0:
203; CHECK-NEXT:    frsqrte v1.2d, v0.2d
204; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
205; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
206; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
207; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
208; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
209; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
210; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
211; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
212; CHECK-NEXT:    fmul v2.2d, v2.2d, v0.2d
213; CHECK-NEXT:    fmul v2.2d, v1.2d, v2.2d
214; CHECK-NEXT:    fcmeq v1.2d, v0.2d, #0.0
215; CHECK-NEXT:    bsl v1.16b, v0.16b, v2.16b
216; CHECK-NEXT:    mov v0.16b, v1.16b
217; CHECK-NEXT:    ret
218  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
219  ret <2 x double> %1
220}
221
222define <4 x double> @d4sqrt(<4 x double> %a) #0 {
223; FAULT-LABEL: d4sqrt:
224; FAULT:       // %bb.0:
225; FAULT-NEXT:    fsqrt v0.2d, v0.2d
226; FAULT-NEXT:    fsqrt v1.2d, v1.2d
227; FAULT-NEXT:    ret
228;
229; CHECK-LABEL: d4sqrt:
230; CHECK:       // %bb.0:
231; CHECK-NEXT:    frsqrte v2.2d, v0.2d
232; CHECK-NEXT:    fmul v3.2d, v2.2d, v2.2d
233; CHECK-NEXT:    frsqrts v3.2d, v0.2d, v3.2d
234; CHECK-NEXT:    fmul v2.2d, v2.2d, v3.2d
235; CHECK-NEXT:    fmul v3.2d, v2.2d, v2.2d
236; CHECK-NEXT:    frsqrts v3.2d, v0.2d, v3.2d
237; CHECK-NEXT:    fmul v2.2d, v2.2d, v3.2d
238; CHECK-NEXT:    fmul v3.2d, v2.2d, v2.2d
239; CHECK-NEXT:    frsqrts v3.2d, v0.2d, v3.2d
240; CHECK-NEXT:    fmul v3.2d, v3.2d, v0.2d
241; CHECK-NEXT:    fmul v3.2d, v2.2d, v3.2d
242; CHECK-NEXT:    fcmeq v2.2d, v0.2d, #0.0
243; CHECK-NEXT:    bsl v2.16b, v0.16b, v3.16b
244; CHECK-NEXT:    frsqrte v0.2d, v1.2d
245; CHECK-NEXT:    fmul v3.2d, v0.2d, v0.2d
246; CHECK-NEXT:    frsqrts v3.2d, v1.2d, v3.2d
247; CHECK-NEXT:    fmul v0.2d, v0.2d, v3.2d
248; CHECK-NEXT:    fmul v3.2d, v0.2d, v0.2d
249; CHECK-NEXT:    frsqrts v3.2d, v1.2d, v3.2d
250; CHECK-NEXT:    fmul v0.2d, v0.2d, v3.2d
251; CHECK-NEXT:    fmul v3.2d, v0.2d, v0.2d
252; CHECK-NEXT:    frsqrts v3.2d, v1.2d, v3.2d
253; CHECK-NEXT:    fmul v3.2d, v3.2d, v1.2d
254; CHECK-NEXT:    fmul v0.2d, v0.2d, v3.2d
255; CHECK-NEXT:    fcmeq v3.2d, v1.2d, #0.0
256; CHECK-NEXT:    bsl v3.16b, v1.16b, v0.16b
257; CHECK-NEXT:    mov v0.16b, v2.16b
258; CHECK-NEXT:    mov v1.16b, v3.16b
259; CHECK-NEXT:    ret
260  %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
261  ret <4 x double> %1
262}
263
264define float @frsqrt(float %a) #0 {
265; FAULT-LABEL: frsqrt:
266; FAULT:       // %bb.0:
267; FAULT-NEXT:    fsqrt s0, s0
268; FAULT-NEXT:    fmov s1, #1.00000000
269; FAULT-NEXT:    fdiv s0, s1, s0
270; FAULT-NEXT:    ret
271;
272; CHECK-LABEL: frsqrt:
273; CHECK:       // %bb.0:
274; CHECK-NEXT:    frsqrte s1, s0
275; CHECK-NEXT:    fmul s2, s1, s1
276; CHECK-NEXT:    frsqrts s2, s0, s2
277; CHECK-NEXT:    fmul s1, s1, s2
278; CHECK-NEXT:    fmul s2, s1, s1
279; CHECK-NEXT:    frsqrts s0, s0, s2
280; CHECK-NEXT:    fmul s0, s1, s0
281; CHECK-NEXT:    ret
282  %1 = tail call fast float @llvm.sqrt.f32(float %a)
283  %2 = fdiv fast float 1.000000e+00, %1
284  ret float %2
285}
286
287define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
288; FAULT-LABEL: f2rsqrt:
289; FAULT:       // %bb.0:
290; FAULT-NEXT:    fsqrt v0.2s, v0.2s
291; FAULT-NEXT:    fmov v1.2s, #1.00000000
292; FAULT-NEXT:    fdiv v0.2s, v1.2s, v0.2s
293; FAULT-NEXT:    ret
294;
295; CHECK-LABEL: f2rsqrt:
296; CHECK:       // %bb.0:
297; CHECK-NEXT:    frsqrte v1.2s, v0.2s
298; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
299; CHECK-NEXT:    frsqrts v2.2s, v0.2s, v2.2s
300; CHECK-NEXT:    fmul v1.2s, v1.2s, v2.2s
301; CHECK-NEXT:    fmul v2.2s, v1.2s, v1.2s
302; CHECK-NEXT:    frsqrts v0.2s, v0.2s, v2.2s
303; CHECK-NEXT:    fmul v0.2s, v1.2s, v0.2s
304; CHECK-NEXT:    ret
305  %1 = tail call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %a)
306  %2 = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %1
307  ret <2 x float> %2
308}
309
310define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
311; FAULT-LABEL: f4rsqrt:
312; FAULT:       // %bb.0:
313; FAULT-NEXT:    fsqrt v0.4s, v0.4s
314; FAULT-NEXT:    fmov v1.4s, #1.00000000
315; FAULT-NEXT:    fdiv v0.4s, v1.4s, v0.4s
316; FAULT-NEXT:    ret
317;
318; CHECK-LABEL: f4rsqrt:
319; CHECK:       // %bb.0:
320; CHECK-NEXT:    frsqrte v1.4s, v0.4s
321; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
322; CHECK-NEXT:    frsqrts v2.4s, v0.4s, v2.4s
323; CHECK-NEXT:    fmul v1.4s, v1.4s, v2.4s
324; CHECK-NEXT:    fmul v2.4s, v1.4s, v1.4s
325; CHECK-NEXT:    frsqrts v0.4s, v0.4s, v2.4s
326; CHECK-NEXT:    fmul v0.4s, v1.4s, v0.4s
327; CHECK-NEXT:    ret
328  %1 = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %a)
329  %2 = fdiv fast <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
330  ret <4 x float> %2
331}
332
333define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
334; FAULT-LABEL: f8rsqrt:
335; FAULT:       // %bb.0:
336; FAULT-NEXT:    fsqrt v1.4s, v1.4s
337; FAULT-NEXT:    fsqrt v0.4s, v0.4s
338; FAULT-NEXT:    fmov v2.4s, #1.00000000
339; FAULT-NEXT:    fdiv v0.4s, v2.4s, v0.4s
340; FAULT-NEXT:    fdiv v1.4s, v2.4s, v1.4s
341; FAULT-NEXT:    ret
342;
343; CHECK-LABEL: f8rsqrt:
344; CHECK:       // %bb.0:
345; CHECK-NEXT:    frsqrte v2.4s, v0.4s
346; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
347; CHECK-NEXT:    frsqrte v3.4s, v1.4s
348; CHECK-NEXT:    frsqrts v4.4s, v0.4s, v4.4s
349; CHECK-NEXT:    fmul v2.4s, v2.4s, v4.4s
350; CHECK-NEXT:    fmul v4.4s, v3.4s, v3.4s
351; CHECK-NEXT:    frsqrts v4.4s, v1.4s, v4.4s
352; CHECK-NEXT:    fmul v3.4s, v3.4s, v4.4s
353; CHECK-NEXT:    fmul v4.4s, v2.4s, v2.4s
354; CHECK-NEXT:    frsqrts v0.4s, v0.4s, v4.4s
355; CHECK-NEXT:    fmul v4.4s, v3.4s, v3.4s
356; CHECK-NEXT:    frsqrts v1.4s, v1.4s, v4.4s
357; CHECK-NEXT:    fmul v0.4s, v2.4s, v0.4s
358; CHECK-NEXT:    fmul v1.4s, v3.4s, v1.4s
359; CHECK-NEXT:    ret
360  %1 = tail call fast <8 x float> @llvm.sqrt.v8f32(<8 x float> %a)
361  %2 = fdiv fast <8 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %1
362  ret <8 x float> %2
363}
364
365define double @drsqrt(double %a) #0 {
366; FAULT-LABEL: drsqrt:
367; FAULT:       // %bb.0:
368; FAULT-NEXT:    fsqrt d0, d0
369; FAULT-NEXT:    fmov d1, #1.00000000
370; FAULT-NEXT:    fdiv d0, d1, d0
371; FAULT-NEXT:    ret
372;
373; CHECK-LABEL: drsqrt:
374; CHECK:       // %bb.0:
375; CHECK-NEXT:    frsqrte d1, d0
376; CHECK-NEXT:    fmul d2, d1, d1
377; CHECK-NEXT:    frsqrts d2, d0, d2
378; CHECK-NEXT:    fmul d1, d1, d2
379; CHECK-NEXT:    fmul d2, d1, d1
380; CHECK-NEXT:    frsqrts d2, d0, d2
381; CHECK-NEXT:    fmul d1, d1, d2
382; CHECK-NEXT:    fmul d2, d1, d1
383; CHECK-NEXT:    frsqrts d0, d0, d2
384; CHECK-NEXT:    fmul d0, d1, d0
385; CHECK-NEXT:    ret
386  %1 = tail call fast double @llvm.sqrt.f64(double %a)
387  %2 = fdiv fast double 1.000000e+00, %1
388  ret double %2
389}
390
391define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
392; FAULT-LABEL: d2rsqrt:
393; FAULT:       // %bb.0:
394; FAULT-NEXT:    fsqrt v0.2d, v0.2d
395; FAULT-NEXT:    fmov v1.2d, #1.00000000
396; FAULT-NEXT:    fdiv v0.2d, v1.2d, v0.2d
397; FAULT-NEXT:    ret
398;
399; CHECK-LABEL: d2rsqrt:
400; CHECK:       // %bb.0:
401; CHECK-NEXT:    frsqrte v1.2d, v0.2d
402; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
403; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
404; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
405; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
406; CHECK-NEXT:    frsqrts v2.2d, v0.2d, v2.2d
407; CHECK-NEXT:    fmul v1.2d, v1.2d, v2.2d
408; CHECK-NEXT:    fmul v2.2d, v1.2d, v1.2d
409; CHECK-NEXT:    frsqrts v0.2d, v0.2d, v2.2d
410; CHECK-NEXT:    fmul v0.2d, v1.2d, v0.2d
411; CHECK-NEXT:    ret
412  %1 = tail call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %a)
413  %2 = fdiv fast <2 x double> <double 1.000000e+00, double 1.000000e+00>, %1
414  ret <2 x double> %2
415}
416
417define <4 x double> @d4rsqrt(<4 x double> %a) #0 {
418; FAULT-LABEL: d4rsqrt:
419; FAULT:       // %bb.0:
420; FAULT-NEXT:    fsqrt v1.2d, v1.2d
421; FAULT-NEXT:    fsqrt v0.2d, v0.2d
422; FAULT-NEXT:    fmov v2.2d, #1.00000000
423; FAULT-NEXT:    fdiv v0.2d, v2.2d, v0.2d
424; FAULT-NEXT:    fdiv v1.2d, v2.2d, v1.2d
425; FAULT-NEXT:    ret
426;
427; CHECK-LABEL: d4rsqrt:
428; CHECK:       // %bb.0:
429; CHECK-NEXT:    frsqrte v2.2d, v0.2d
430; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
431; CHECK-NEXT:    frsqrte v3.2d, v1.2d
432; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
433; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
434; CHECK-NEXT:    fmul v4.2d, v3.2d, v3.2d
435; CHECK-NEXT:    frsqrts v4.2d, v1.2d, v4.2d
436; CHECK-NEXT:    fmul v3.2d, v3.2d, v4.2d
437; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
438; CHECK-NEXT:    frsqrts v4.2d, v0.2d, v4.2d
439; CHECK-NEXT:    fmul v2.2d, v2.2d, v4.2d
440; CHECK-NEXT:    fmul v4.2d, v3.2d, v3.2d
441; CHECK-NEXT:    frsqrts v4.2d, v1.2d, v4.2d
442; CHECK-NEXT:    fmul v3.2d, v3.2d, v4.2d
443; CHECK-NEXT:    fmul v4.2d, v2.2d, v2.2d
444; CHECK-NEXT:    frsqrts v0.2d, v0.2d, v4.2d
445; CHECK-NEXT:    fmul v4.2d, v3.2d, v3.2d
446; CHECK-NEXT:    frsqrts v1.2d, v1.2d, v4.2d
447; CHECK-NEXT:    fmul v0.2d, v2.2d, v0.2d
448; CHECK-NEXT:    fmul v1.2d, v3.2d, v1.2d
449; CHECK-NEXT:    ret
450  %1 = tail call fast <4 x double> @llvm.sqrt.v4f64(<4 x double> %a)
451  %2 = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, %1
452  ret <4 x double> %2
453}
454
455attributes #0 = { "unsafe-fp-math"="true" }
456attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" }
457
458