1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
3; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s
4
5; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
6; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s
7
8; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
9; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s
10
11; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
12; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
13
14define double @v_fdiv_f64(double %a, double %b) {
15; GFX6-LABEL: v_fdiv_f64:
16; GFX6:       ; %bb.0:
17; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
18; GFX6-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
19; GFX6-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
20; GFX6-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
21; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v5
22; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v11
23; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
24; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
25; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
26; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
27; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
28; GFX6-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
29; GFX6-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
30; GFX6-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
31; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
32; GFX6-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX8-LABEL: v_fdiv_f64:
35; GFX8:       ; %bb.0:
36; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX8-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
38; GFX8-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
39; GFX8-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
40; GFX8-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
41; GFX8-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
42; GFX8-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
43; GFX8-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
44; GFX8-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
45; GFX8-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
46; GFX8-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
47; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
48; GFX8-NEXT:    s_setpc_b64 s[30:31]
49;
50; GFX9-LABEL: v_fdiv_f64:
51; GFX9:       ; %bb.0:
52; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
53; GFX9-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
54; GFX9-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
55; GFX9-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
56; GFX9-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
57; GFX9-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
58; GFX9-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
59; GFX9-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
60; GFX9-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
61; GFX9-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
62; GFX9-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
63; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
64; GFX9-NEXT:    s_setpc_b64 s[30:31]
65;
66; GFX10-LABEL: v_fdiv_f64:
67; GFX10:       ; %bb.0:
68; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
70; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
71; GFX10-NEXT:    v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
72; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
73; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
74; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
75; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
76; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
77; GFX10-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
78; GFX10-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
79; GFX10-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
80; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
81; GFX10-NEXT:    s_setpc_b64 s[30:31]
82  %fdiv = fdiv double %a, %b
83  ret double %fdiv
84}
85
86define double @v_fdiv_f64_afn(double %a, double %b) {
87; GCN-LABEL: v_fdiv_f64_afn:
88; GCN:       ; %bb.0:
89; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GCN-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
91; GCN-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
92; GCN-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
93; GCN-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
94; GCN-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
95; GCN-NEXT:    v_mul_f64 v[6:7], v[0:1], v[4:5]
96; GCN-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1]
97; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7]
98; GCN-NEXT:    s_setpc_b64 s[30:31]
99;
100; GFX10-LABEL: v_fdiv_f64_afn:
101; GFX10:       ; %bb.0:
102; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
103; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
104; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
105; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
106; GFX10-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
107; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
108; GFX10-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
109; GFX10-NEXT:    v_mul_f64 v[6:7], v[0:1], v[4:5]
110; GFX10-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1]
111; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7]
112; GFX10-NEXT:    s_setpc_b64 s[30:31]
113  %fdiv = fdiv afn double %a, %b
114  ret double %fdiv
115}
116
117define double @v_fdiv_f64_ulp25(double %a, double %b) {
118; GFX6-LABEL: v_fdiv_f64_ulp25:
119; GFX6:       ; %bb.0:
120; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
121; GFX6-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
122; GFX6-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
123; GFX6-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
124; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v5
125; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v11
126; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
127; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
128; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
129; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
130; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
131; GFX6-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
132; GFX6-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
133; GFX6-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
134; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
135; GFX6-NEXT:    s_setpc_b64 s[30:31]
136;
137; GFX8-LABEL: v_fdiv_f64_ulp25:
138; GFX8:       ; %bb.0:
139; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX8-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
141; GFX8-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
142; GFX8-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
143; GFX8-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
144; GFX8-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
145; GFX8-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
146; GFX8-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
147; GFX8-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
148; GFX8-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
149; GFX8-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
150; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
151; GFX8-NEXT:    s_setpc_b64 s[30:31]
152;
153; GFX9-LABEL: v_fdiv_f64_ulp25:
154; GFX9:       ; %bb.0:
155; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
156; GFX9-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
157; GFX9-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
158; GFX9-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
159; GFX9-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
160; GFX9-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
161; GFX9-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
162; GFX9-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
163; GFX9-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
164; GFX9-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
165; GFX9-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
166; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
167; GFX9-NEXT:    s_setpc_b64 s[30:31]
168;
169; GFX10-LABEL: v_fdiv_f64_ulp25:
170; GFX10:       ; %bb.0:
171; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
172; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
173; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
174; GFX10-NEXT:    v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
175; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
176; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
177; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
178; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
179; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
180; GFX10-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
181; GFX10-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
182; GFX10-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
183; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
184; GFX10-NEXT:    s_setpc_b64 s[30:31]
185  %fdiv = fdiv double %a, %b, !fpmath !0
186  ret double %fdiv
187}
188
189define double @v_rcp_f64(double %x) {
190; GFX6-LABEL: v_rcp_f64:
191; GFX6:       ; %bb.0:
192; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
193; GFX6-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
194; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
195; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
196; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
197; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
198; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
199; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
200; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
201; GFX6-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
202; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
203; GFX6-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
204; GFX6-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
205; GFX6-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
206; GFX6-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
207; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
208; GFX6-NEXT:    s_setpc_b64 s[30:31]
209;
210; GFX8-LABEL: v_rcp_f64:
211; GFX8:       ; %bb.0:
212; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
213; GFX8-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
214; GFX8-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
215; GFX8-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
216; GFX8-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
217; GFX8-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
218; GFX8-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
219; GFX8-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
220; GFX8-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
221; GFX8-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
222; GFX8-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
223; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
224; GFX8-NEXT:    s_setpc_b64 s[30:31]
225;
226; GFX9-LABEL: v_rcp_f64:
227; GFX9:       ; %bb.0:
228; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
229; GFX9-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
230; GFX9-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
231; GFX9-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
232; GFX9-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
233; GFX9-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
234; GFX9-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
235; GFX9-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
236; GFX9-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
237; GFX9-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
238; GFX9-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
239; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
240; GFX9-NEXT:    s_setpc_b64 s[30:31]
241;
242; GFX10-LABEL: v_rcp_f64:
243; GFX10:       ; %bb.0:
244; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
245; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
246; GFX10-NEXT:    v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
247; GFX10-NEXT:    v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
248; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
249; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
250; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
251; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
252; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
253; GFX10-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
254; GFX10-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
255; GFX10-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
256; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
257; GFX10-NEXT:    s_setpc_b64 s[30:31]
258  %fdiv = fdiv double 1.0, %x
259  ret double %fdiv
260}
261
262define double @v_rcp_f64_arcp(double %x) {
263; GFX6-LABEL: v_rcp_f64_arcp:
264; GFX6:       ; %bb.0:
265; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266; GFX6-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
267; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
268; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
269; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
270; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
271; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
272; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
273; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
274; GFX6-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
275; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
276; GFX6-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
277; GFX6-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
278; GFX6-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
279; GFX6-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
280; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
281; GFX6-NEXT:    s_setpc_b64 s[30:31]
282;
283; GFX8-LABEL: v_rcp_f64_arcp:
284; GFX8:       ; %bb.0:
285; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
286; GFX8-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
287; GFX8-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
288; GFX8-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
289; GFX8-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
290; GFX8-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
291; GFX8-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
292; GFX8-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
293; GFX8-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
294; GFX8-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
295; GFX8-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
296; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
297; GFX8-NEXT:    s_setpc_b64 s[30:31]
298;
299; GFX9-LABEL: v_rcp_f64_arcp:
300; GFX9:       ; %bb.0:
301; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
302; GFX9-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
303; GFX9-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
304; GFX9-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
305; GFX9-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
306; GFX9-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
307; GFX9-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
308; GFX9-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
309; GFX9-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
310; GFX9-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
311; GFX9-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
312; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
313; GFX9-NEXT:    s_setpc_b64 s[30:31]
314;
315; GFX10-LABEL: v_rcp_f64_arcp:
316; GFX10:       ; %bb.0:
317; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
319; GFX10-NEXT:    v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
320; GFX10-NEXT:    v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
321; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
322; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
323; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
324; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
325; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
326; GFX10-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
327; GFX10-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
328; GFX10-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
329; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
330; GFX10-NEXT:    s_setpc_b64 s[30:31]
331  %fdiv = fdiv arcp double 1.0, %x
332  ret double %fdiv
333}
334
335define double @v_rcp_f64_arcp_afn(double %x) {
336; GCN-LABEL: v_rcp_f64_arcp_afn:
337; GCN:       ; %bb.0:
338; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339; GCN-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
340; GCN-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
341; GCN-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
342; GCN-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
343; GCN-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
344; GCN-NEXT:    v_mul_f64 v[4:5], 1.0, v[2:3]
345; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
346; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
347; GCN-NEXT:    s_setpc_b64 s[30:31]
348;
349; GFX10-LABEL: v_rcp_f64_arcp_afn:
350; GFX10:       ; %bb.0:
351; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
352; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
353; GFX10-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
354; GFX10-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
355; GFX10-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
356; GFX10-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
357; GFX10-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
358; GFX10-NEXT:    v_mul_f64 v[4:5], 1.0, v[2:3]
359; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
360; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
361; GFX10-NEXT:    s_setpc_b64 s[30:31]
362  %fdiv = fdiv arcp afn double 1.0, %x
363  ret double %fdiv
364}
365
366define double @v_rcp_f64_ulp25(double %x) {
367; GFX6-LABEL: v_rcp_f64_ulp25:
368; GFX6:       ; %bb.0:
369; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370; GFX6-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
371; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0
372; GFX6-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
373; GFX6-NEXT:    v_mov_b32_e32 v10, 0x3ff00000
374; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v10, v9
375; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v3
376; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
377; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
378; GFX6-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
379; GFX6-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
380; GFX6-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
381; GFX6-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
382; GFX6-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
383; GFX6-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
384; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
385; GFX6-NEXT:    s_setpc_b64 s[30:31]
386;
387; GFX8-LABEL: v_rcp_f64_ulp25:
388; GFX8:       ; %bb.0:
389; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
390; GFX8-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
391; GFX8-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
392; GFX8-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
393; GFX8-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
394; GFX8-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
395; GFX8-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
396; GFX8-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
397; GFX8-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
398; GFX8-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
399; GFX8-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
400; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
401; GFX8-NEXT:    s_setpc_b64 s[30:31]
402;
403; GFX9-LABEL: v_rcp_f64_ulp25:
404; GFX9:       ; %bb.0:
405; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406; GFX9-NEXT:    v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0
407; GFX9-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
408; GFX9-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
409; GFX9-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
410; GFX9-NEXT:    v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0
411; GFX9-NEXT:    v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0
412; GFX9-NEXT:    v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5]
413; GFX9-NEXT:    v_mul_f64 v[8:9], v[6:7], v[4:5]
414; GFX9-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7]
415; GFX9-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9]
416; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
417; GFX9-NEXT:    s_setpc_b64 s[30:31]
418;
419; GFX10-LABEL: v_rcp_f64_ulp25:
420; GFX10:       ; %bb.0:
421; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
423; GFX10-NEXT:    v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0
424; GFX10-NEXT:    v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0
425; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
426; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
427; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
428; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
429; GFX10-NEXT:    v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5]
430; GFX10-NEXT:    v_mul_f64 v[6:7], v[8:9], v[4:5]
431; GFX10-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9]
432; GFX10-NEXT:    v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7]
433; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0
434; GFX10-NEXT:    s_setpc_b64 s[30:31]
435  %fdiv = fdiv double 1.0, %x, !fpmath !0
436  ret double %fdiv
437}
438
439define double @v_fdiv_f64_afn_ulp25(double %a, double %b) {
440; GCN-LABEL: v_fdiv_f64_afn_ulp25:
441; GCN:       ; %bb.0:
442; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
443; GCN-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
444; GCN-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
445; GCN-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
446; GCN-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
447; GCN-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
448; GCN-NEXT:    v_mul_f64 v[6:7], v[0:1], v[4:5]
449; GCN-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1]
450; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7]
451; GCN-NEXT:    s_setpc_b64 s[30:31]
452;
453; GFX10-LABEL: v_fdiv_f64_afn_ulp25:
454; GFX10:       ; %bb.0:
455; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
457; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[2:3]
458; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
459; GFX10-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
460; GFX10-NEXT:    v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0
461; GFX10-NEXT:    v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5]
462; GFX10-NEXT:    v_mul_f64 v[6:7], v[0:1], v[4:5]
463; GFX10-NEXT:    v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1]
464; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7]
465; GFX10-NEXT:    s_setpc_b64 s[30:31]
466  %fdiv = fdiv afn double %a, %b, !fpmath !0
467  ret double %fdiv
468}
469
470define double @v_fdiv_f64_arcp_ulp25(double %a, double %b) {
471; GFX6-LABEL: v_fdiv_f64_arcp_ulp25:
472; GFX6:       ; %bb.0:
473; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474; GFX6-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
475; GFX6-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1]
476; GFX6-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
477; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v5
478; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v11
479; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
480; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
481; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
482; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
483; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
484; GFX6-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
485; GFX6-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
486; GFX6-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
487; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
488; GFX6-NEXT:    s_setpc_b64 s[30:31]
489;
490; GFX8-LABEL: v_fdiv_f64_arcp_ulp25:
491; GFX8:       ; %bb.0:
492; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
493; GFX8-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
494; GFX8-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
495; GFX8-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
496; GFX8-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
497; GFX8-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
498; GFX8-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
499; GFX8-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
500; GFX8-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
501; GFX8-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
502; GFX8-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
503; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
504; GFX8-NEXT:    s_setpc_b64 s[30:31]
505;
506; GFX9-LABEL: v_fdiv_f64_arcp_ulp25:
507; GFX9:       ; %bb.0:
508; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
509; GFX9-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1]
510; GFX9-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
511; GFX9-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
512; GFX9-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
513; GFX9-NEXT:    v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1]
514; GFX9-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0
515; GFX9-NEXT:    v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7]
516; GFX9-NEXT:    v_mul_f64 v[10:11], v[8:9], v[6:7]
517; GFX9-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9]
518; GFX9-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11]
519; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
520; GFX9-NEXT:    s_setpc_b64 s[30:31]
521;
522; GFX10-LABEL: v_fdiv_f64_arcp_ulp25:
523; GFX10:       ; %bb.0:
524; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
525; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
526; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1]
527; GFX10-NEXT:    v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1]
528; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
529; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
530; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
531; GFX10-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
532; GFX10-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
533; GFX10-NEXT:    v_mul_f64 v[8:9], v[10:11], v[6:7]
534; GFX10-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11]
535; GFX10-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9]
536; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1]
537; GFX10-NEXT:    s_setpc_b64 s[30:31]
538  %fdiv = fdiv arcp double %a, %b, !fpmath !0
539  ret double %fdiv
540}
541
542define <2 x double> @v_fdiv_v2f64(<2 x double> %a, <2 x double> %b) {
543; GFX6-LABEL: v_fdiv_v2f64:
544; GFX6:       ; %bb.0:
545; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
547; GFX6-NEXT:    v_div_scale_f64 v[14:15], s[4:5], v[6:7], v[6:7], v[2:3]
548; GFX6-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
549; GFX6-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[0:1], v[4:5], v[0:1]
550; GFX6-NEXT:    v_rcp_f64_e32 v[16:17], v[14:15]
551; GFX6-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0
552; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v19
553; GFX6-NEXT:    v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11]
554; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v9
555; GFX6-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0
556; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
557; GFX6-NEXT:    v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11]
558; GFX6-NEXT:    v_fma_f64 v[12:13], -v[14:15], v[16:17], 1.0
559; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v15
560; GFX6-NEXT:    v_fma_f64 v[12:13], v[16:17], v[12:13], v[16:17]
561; GFX6-NEXT:    v_mul_f64 v[16:17], v[18:19], v[10:11]
562; GFX6-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[16:17], v[18:19]
563; GFX6-NEXT:    v_fma_f64 v[8:9], -v[14:15], v[12:13], 1.0
564; GFX6-NEXT:    v_div_fmas_f64 v[10:11], v[18:19], v[10:11], v[16:17]
565; GFX6-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
566; GFX6-NEXT:    v_div_scale_f64 v[12:13], s[6:7], v[2:3], v[6:7], v[2:3]
567; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[10:11], v[4:5], v[0:1]
568; GFX6-NEXT:    v_mul_f64 v[16:17], v[12:13], v[8:9]
569; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v13
570; GFX6-NEXT:    v_fma_f64 v[18:19], -v[14:15], v[16:17], v[12:13]
571; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
572; GFX6-NEXT:    s_nop 1
573; GFX6-NEXT:    v_div_fmas_f64 v[8:9], v[18:19], v[8:9], v[16:17]
574; GFX6-NEXT:    v_div_fixup_f64 v[2:3], v[8:9], v[6:7], v[2:3]
575; GFX6-NEXT:    s_setpc_b64 s[30:31]
576;
577; GFX8-LABEL: v_fdiv_v2f64:
578; GFX8:       ; %bb.0:
579; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
580; GFX8-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
581; GFX8-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3]
582; GFX8-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
583; GFX8-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
584; GFX8-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
585; GFX8-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
586; GFX8-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
587; GFX8-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
588; GFX8-NEXT:    v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1]
589; GFX8-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
590; GFX8-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
591; GFX8-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0
592; GFX8-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
593; GFX8-NEXT:    v_mul_f64 v[16:17], v[18:19], v[12:13]
594; GFX8-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19]
595; GFX8-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3]
596; GFX8-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17]
597; GFX8-NEXT:    s_mov_b64 vcc, s[4:5]
598; GFX8-NEXT:    v_mul_f64 v[20:21], v[18:19], v[14:15]
599; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
600; GFX8-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19]
601; GFX8-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21]
602; GFX8-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
603; GFX8-NEXT:    s_setpc_b64 s[30:31]
604;
605; GFX9-LABEL: v_fdiv_v2f64:
606; GFX9:       ; %bb.0:
607; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
608; GFX9-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
609; GFX9-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3]
610; GFX9-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
611; GFX9-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
612; GFX9-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
613; GFX9-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
614; GFX9-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
615; GFX9-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
616; GFX9-NEXT:    v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1]
617; GFX9-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
618; GFX9-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
619; GFX9-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0
620; GFX9-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
621; GFX9-NEXT:    v_mul_f64 v[16:17], v[18:19], v[12:13]
622; GFX9-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19]
623; GFX9-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3]
624; GFX9-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17]
625; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
626; GFX9-NEXT:    v_mul_f64 v[20:21], v[18:19], v[14:15]
627; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
628; GFX9-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19]
629; GFX9-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21]
630; GFX9-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
631; GFX9-NEXT:    s_setpc_b64 s[30:31]
632;
633; GFX10-LABEL: v_fdiv_v2f64:
634; GFX10:       ; %bb.0:
635; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
636; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
637; GFX10-NEXT:    v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
638; GFX10-NEXT:    v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
639; GFX10-NEXT:    v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
640; GFX10-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
641; GFX10-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
642; GFX10-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
643; GFX10-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
644; GFX10-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
645; GFX10-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
646; GFX10-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
647; GFX10-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
648; GFX10-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
649; GFX10-NEXT:    v_div_scale_f64 v[16:17], s4, v[2:3], v[6:7], v[2:3]
650; GFX10-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
651; GFX10-NEXT:    v_mul_f64 v[18:19], v[20:21], v[12:13]
652; GFX10-NEXT:    v_mul_f64 v[22:23], v[16:17], v[14:15]
653; GFX10-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[18:19], v[20:21]
654; GFX10-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[22:23], v[16:17]
655; GFX10-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[18:19]
656; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
657; GFX10-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[22:23]
658; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
659; GFX10-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
660; GFX10-NEXT:    s_setpc_b64 s[30:31]
661  %fdiv = fdiv <2 x double> %a, %b
662  ret <2 x double> %fdiv
663}
664
665define <2 x double> @v_fdiv_v2f64_afn(<2 x double> %a, <2 x double> %b) {
666; GCN-LABEL: v_fdiv_v2f64_afn:
667; GCN:       ; %bb.0:
668; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
669; GCN-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
670; GCN-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
671; GCN-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
672; GCN-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
673; GCN-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
674; GCN-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
675; GCN-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
676; GCN-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
677; GCN-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
678; GCN-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
679; GCN-NEXT:    v_mul_f64 v[12:13], v[0:1], v[8:9]
680; GCN-NEXT:    v_mul_f64 v[14:15], v[2:3], v[10:11]
681; GCN-NEXT:    v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1]
682; GCN-NEXT:    v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3]
683; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13]
684; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15]
685; GCN-NEXT:    s_setpc_b64 s[30:31]
686;
687; GFX10-LABEL: v_fdiv_v2f64_afn:
688; GFX10:       ; %bb.0:
689; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
691; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
692; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
693; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
694; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
695; GFX10-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
696; GFX10-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
697; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
698; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
699; GFX10-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
700; GFX10-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
701; GFX10-NEXT:    v_mul_f64 v[12:13], v[0:1], v[8:9]
702; GFX10-NEXT:    v_mul_f64 v[14:15], v[2:3], v[10:11]
703; GFX10-NEXT:    v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1]
704; GFX10-NEXT:    v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3]
705; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13]
706; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15]
707; GFX10-NEXT:    s_setpc_b64 s[30:31]
708  %fdiv = fdiv afn <2 x double> %a, %b
709  ret <2 x double> %fdiv
710}
711
712define <2 x double> @v_fdiv_v2f64_ulp25(<2 x double> %a, <2 x double> %b) {
713; GFX6-LABEL: v_fdiv_v2f64_ulp25:
714; GFX6:       ; %bb.0:
715; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
716; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
717; GFX6-NEXT:    v_div_scale_f64 v[14:15], s[4:5], v[6:7], v[6:7], v[2:3]
718; GFX6-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
719; GFX6-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[0:1], v[4:5], v[0:1]
720; GFX6-NEXT:    v_rcp_f64_e32 v[16:17], v[14:15]
721; GFX6-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0
722; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v19
723; GFX6-NEXT:    v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11]
724; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v9
725; GFX6-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0
726; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
727; GFX6-NEXT:    v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11]
728; GFX6-NEXT:    v_fma_f64 v[12:13], -v[14:15], v[16:17], 1.0
729; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v15
730; GFX6-NEXT:    v_fma_f64 v[12:13], v[16:17], v[12:13], v[16:17]
731; GFX6-NEXT:    v_mul_f64 v[16:17], v[18:19], v[10:11]
732; GFX6-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[16:17], v[18:19]
733; GFX6-NEXT:    v_fma_f64 v[8:9], -v[14:15], v[12:13], 1.0
734; GFX6-NEXT:    v_div_fmas_f64 v[10:11], v[18:19], v[10:11], v[16:17]
735; GFX6-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
736; GFX6-NEXT:    v_div_scale_f64 v[12:13], s[6:7], v[2:3], v[6:7], v[2:3]
737; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[10:11], v[4:5], v[0:1]
738; GFX6-NEXT:    v_mul_f64 v[16:17], v[12:13], v[8:9]
739; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v13
740; GFX6-NEXT:    v_fma_f64 v[18:19], -v[14:15], v[16:17], v[12:13]
741; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
742; GFX6-NEXT:    s_nop 1
743; GFX6-NEXT:    v_div_fmas_f64 v[8:9], v[18:19], v[8:9], v[16:17]
744; GFX6-NEXT:    v_div_fixup_f64 v[2:3], v[8:9], v[6:7], v[2:3]
745; GFX6-NEXT:    s_setpc_b64 s[30:31]
746;
747; GFX8-LABEL: v_fdiv_v2f64_ulp25:
748; GFX8:       ; %bb.0:
749; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
750; GFX8-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
751; GFX8-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3]
752; GFX8-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
753; GFX8-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
754; GFX8-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
755; GFX8-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
756; GFX8-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
757; GFX8-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
758; GFX8-NEXT:    v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1]
759; GFX8-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
760; GFX8-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
761; GFX8-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0
762; GFX8-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
763; GFX8-NEXT:    v_mul_f64 v[16:17], v[18:19], v[12:13]
764; GFX8-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19]
765; GFX8-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3]
766; GFX8-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17]
767; GFX8-NEXT:    s_mov_b64 vcc, s[4:5]
768; GFX8-NEXT:    v_mul_f64 v[20:21], v[18:19], v[14:15]
769; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
770; GFX8-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19]
771; GFX8-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21]
772; GFX8-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
773; GFX8-NEXT:    s_setpc_b64 s[30:31]
774;
775; GFX9-LABEL: v_fdiv_v2f64_ulp25:
776; GFX9:       ; %bb.0:
777; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
778; GFX9-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
779; GFX9-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3]
780; GFX9-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
781; GFX9-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
782; GFX9-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
783; GFX9-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
784; GFX9-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
785; GFX9-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
786; GFX9-NEXT:    v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1]
787; GFX9-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
788; GFX9-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
789; GFX9-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0
790; GFX9-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
791; GFX9-NEXT:    v_mul_f64 v[16:17], v[18:19], v[12:13]
792; GFX9-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19]
793; GFX9-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3]
794; GFX9-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17]
795; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
796; GFX9-NEXT:    v_mul_f64 v[20:21], v[18:19], v[14:15]
797; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
798; GFX9-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19]
799; GFX9-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21]
800; GFX9-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
801; GFX9-NEXT:    s_setpc_b64 s[30:31]
802;
803; GFX10-LABEL: v_fdiv_v2f64_ulp25:
804; GFX10:       ; %bb.0:
805; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
806; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
807; GFX10-NEXT:    v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
808; GFX10-NEXT:    v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
809; GFX10-NEXT:    v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
810; GFX10-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
811; GFX10-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
812; GFX10-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
813; GFX10-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
814; GFX10-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
815; GFX10-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
816; GFX10-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
817; GFX10-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
818; GFX10-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
819; GFX10-NEXT:    v_div_scale_f64 v[16:17], s4, v[2:3], v[6:7], v[2:3]
820; GFX10-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
821; GFX10-NEXT:    v_mul_f64 v[18:19], v[20:21], v[12:13]
822; GFX10-NEXT:    v_mul_f64 v[22:23], v[16:17], v[14:15]
823; GFX10-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[18:19], v[20:21]
824; GFX10-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[22:23], v[16:17]
825; GFX10-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[18:19]
826; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
827; GFX10-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[22:23]
828; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
829; GFX10-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
830; GFX10-NEXT:    s_setpc_b64 s[30:31]
831  %fdiv = fdiv <2 x double> %a, %b, !fpmath !0
832  ret <2 x double> %fdiv
833}
834
835define <2 x double> @v_rcp_v2f64(<2 x double> %x) {
836; GFX6-LABEL: v_rcp_v2f64:
837; GFX6:       ; %bb.0:
838; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839; GFX6-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
840; GFX6-NEXT:    v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0
841; GFX6-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
842; GFX6-NEXT:    v_mov_b32_e32 v18, 0x3ff00000
843; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v18, v11
844; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
845; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
846; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
847; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
848; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
849; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
850; GFX6-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
851; GFX6-NEXT:    v_mul_f64 v[14:15], v[10:11], v[6:7]
852; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
853; GFX6-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11]
854; GFX6-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
855; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15]
856; GFX6-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
857; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0
858; GFX6-NEXT:    v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0
859; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
860; GFX6-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13]
861; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v18, v17
862; GFX6-NEXT:    v_mul_f64 v[12:13], v[16:17], v[4:5]
863; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
864; GFX6-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17]
865; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
866; GFX6-NEXT:    v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13]
867; GFX6-NEXT:    v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0
868; GFX6-NEXT:    s_setpc_b64 s[30:31]
869;
870; GFX8-LABEL: v_rcp_v2f64:
871; GFX8:       ; %bb.0:
872; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
873; GFX8-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
874; GFX8-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
875; GFX8-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
876; GFX8-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
877; GFX8-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
878; GFX8-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
879; GFX8-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
880; GFX8-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
881; GFX8-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
882; GFX8-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
883; GFX8-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
884; GFX8-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
885; GFX8-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
886; GFX8-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
887; GFX8-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
888; GFX8-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
889; GFX8-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
890; GFX8-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
891; GFX8-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
892; GFX8-NEXT:    s_mov_b64 vcc, s[4:5]
893; GFX8-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
894; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
895; GFX8-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
896; GFX8-NEXT:    s_setpc_b64 s[30:31]
897;
898; GFX9-LABEL: v_rcp_v2f64:
899; GFX9:       ; %bb.0:
900; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
901; GFX9-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
902; GFX9-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
903; GFX9-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
904; GFX9-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
905; GFX9-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
906; GFX9-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
907; GFX9-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
908; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
909; GFX9-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
910; GFX9-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
911; GFX9-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
912; GFX9-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
913; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
914; GFX9-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
915; GFX9-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
916; GFX9-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
917; GFX9-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
918; GFX9-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
919; GFX9-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
920; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
921; GFX9-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
922; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
923; GFX9-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
924; GFX9-NEXT:    s_setpc_b64 s[30:31]
925;
926; GFX10-LABEL: v_rcp_v2f64:
927; GFX10:       ; %bb.0:
928; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
929; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
930; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
931; GFX10-NEXT:    v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
932; GFX10-NEXT:    v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
933; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
934; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
935; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
936; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
937; GFX10-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
938; GFX10-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
939; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
940; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
941; GFX10-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
942; GFX10-NEXT:    v_div_scale_f64 v[12:13], s4, 1.0, v[2:3], 1.0
943; GFX10-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
944; GFX10-NEXT:    v_mul_f64 v[14:15], v[16:17], v[8:9]
945; GFX10-NEXT:    v_mul_f64 v[18:19], v[12:13], v[10:11]
946; GFX10-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[16:17]
947; GFX10-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[12:13]
948; GFX10-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
949; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
950; GFX10-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
951; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
952; GFX10-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
953; GFX10-NEXT:    s_setpc_b64 s[30:31]
954  %fdiv = fdiv <2 x double> <double 1.0, double 1.0>, %x
955  ret <2 x double> %fdiv
956}
957
958define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) {
959; GFX6-LABEL: v_rcp_v2f64_arcp:
960; GFX6:       ; %bb.0:
961; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962; GFX6-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
963; GFX6-NEXT:    v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0
964; GFX6-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
965; GFX6-NEXT:    v_mov_b32_e32 v18, 0x3ff00000
966; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v18, v11
967; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
968; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
969; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
970; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
971; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
972; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
973; GFX6-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
974; GFX6-NEXT:    v_mul_f64 v[14:15], v[10:11], v[6:7]
975; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
976; GFX6-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11]
977; GFX6-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
978; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15]
979; GFX6-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
980; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0
981; GFX6-NEXT:    v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0
982; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
983; GFX6-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13]
984; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v18, v17
985; GFX6-NEXT:    v_mul_f64 v[12:13], v[16:17], v[4:5]
986; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
987; GFX6-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17]
988; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
989; GFX6-NEXT:    v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13]
990; GFX6-NEXT:    v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0
991; GFX6-NEXT:    s_setpc_b64 s[30:31]
992;
993; GFX8-LABEL: v_rcp_v2f64_arcp:
994; GFX8:       ; %bb.0:
995; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
996; GFX8-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
997; GFX8-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
998; GFX8-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
999; GFX8-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1000; GFX8-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1001; GFX8-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1002; GFX8-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1003; GFX8-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1004; GFX8-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
1005; GFX8-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1006; GFX8-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
1007; GFX8-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
1008; GFX8-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
1009; GFX8-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
1010; GFX8-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
1011; GFX8-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
1012; GFX8-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
1013; GFX8-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
1014; GFX8-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
1015; GFX8-NEXT:    s_mov_b64 vcc, s[4:5]
1016; GFX8-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
1017; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
1018; GFX8-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
1019; GFX8-NEXT:    s_setpc_b64 s[30:31]
1020;
1021; GFX9-LABEL: v_rcp_v2f64_arcp:
1022; GFX9:       ; %bb.0:
1023; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024; GFX9-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
1025; GFX9-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
1026; GFX9-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
1027; GFX9-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1028; GFX9-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1029; GFX9-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1030; GFX9-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1031; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1032; GFX9-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
1033; GFX9-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1034; GFX9-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
1035; GFX9-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
1036; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
1037; GFX9-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
1038; GFX9-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
1039; GFX9-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
1040; GFX9-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
1041; GFX9-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
1042; GFX9-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
1043; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
1044; GFX9-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
1045; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
1046; GFX9-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
1047; GFX9-NEXT:    s_setpc_b64 s[30:31]
1048;
1049; GFX10-LABEL: v_rcp_v2f64_arcp:
1050; GFX10:       ; %bb.0:
1051; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1052; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1053; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
1054; GFX10-NEXT:    v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
1055; GFX10-NEXT:    v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
1056; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1057; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1058; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1059; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1060; GFX10-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1061; GFX10-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1062; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1063; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1064; GFX10-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1065; GFX10-NEXT:    v_div_scale_f64 v[12:13], s4, 1.0, v[2:3], 1.0
1066; GFX10-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1067; GFX10-NEXT:    v_mul_f64 v[14:15], v[16:17], v[8:9]
1068; GFX10-NEXT:    v_mul_f64 v[18:19], v[12:13], v[10:11]
1069; GFX10-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[16:17]
1070; GFX10-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[12:13]
1071; GFX10-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
1072; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
1073; GFX10-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
1074; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
1075; GFX10-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
1076; GFX10-NEXT:    s_setpc_b64 s[30:31]
1077  %fdiv = fdiv arcp <2 x double> <double 1.0, double 1.0>, %x
1078  ret <2 x double> %fdiv
1079}
1080
1081define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
1082; GCN-LABEL: v_rcp_v2f64_arcp_afn:
1083; GCN:       ; %bb.0:
1084; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1085; GCN-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
1086; GCN-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
1087; GCN-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
1088; GCN-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
1089; GCN-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
1090; GCN-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
1091; GCN-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
1092; GCN-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
1093; GCN-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
1094; GCN-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
1095; GCN-NEXT:    v_mul_f64 v[8:9], 1.0, v[4:5]
1096; GCN-NEXT:    v_mul_f64 v[10:11], 1.0, v[6:7]
1097; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
1098; GCN-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
1099; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
1100; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
1101; GCN-NEXT:    s_setpc_b64 s[30:31]
1102;
1103; GFX10-LABEL: v_rcp_v2f64_arcp_afn:
1104; GFX10:       ; %bb.0:
1105; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1106; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1107; GFX10-NEXT:    v_rcp_f64_e32 v[4:5], v[0:1]
1108; GFX10-NEXT:    v_rcp_f64_e32 v[6:7], v[2:3]
1109; GFX10-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
1110; GFX10-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
1111; GFX10-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
1112; GFX10-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
1113; GFX10-NEXT:    v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0
1114; GFX10-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
1115; GFX10-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
1116; GFX10-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
1117; GFX10-NEXT:    v_mul_f64 v[8:9], 1.0, v[4:5]
1118; GFX10-NEXT:    v_mul_f64 v[10:11], 1.0, v[6:7]
1119; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
1120; GFX10-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
1121; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
1122; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
1123; GFX10-NEXT:    s_setpc_b64 s[30:31]
1124  %fdiv = fdiv arcp afn <2 x double> <double 1.0, double 1.0>, %x
1125  ret <2 x double> %fdiv
1126}
1127
1128define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) {
1129; GFX6-LABEL: v_rcp_v2f64_ulp25:
1130; GFX6:       ; %bb.0:
1131; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1132; GFX6-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
1133; GFX6-NEXT:    v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0
1134; GFX6-NEXT:    v_rcp_f64_e32 v[6:7], v[4:5]
1135; GFX6-NEXT:    v_mov_b32_e32 v18, 0x3ff00000
1136; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v18, v11
1137; GFX6-NEXT:    v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0
1138; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7]
1139; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0
1140; GFX6-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0
1141; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v1, v5
1142; GFX6-NEXT:    v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7]
1143; GFX6-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
1144; GFX6-NEXT:    v_mul_f64 v[14:15], v[10:11], v[6:7]
1145; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
1146; GFX6-NEXT:    v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11]
1147; GFX6-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
1148; GFX6-NEXT:    v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15]
1149; GFX6-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
1150; GFX6-NEXT:    v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0
1151; GFX6-NEXT:    v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0
1152; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v3, v9
1153; GFX6-NEXT:    v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13]
1154; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v18, v17
1155; GFX6-NEXT:    v_mul_f64 v[12:13], v[16:17], v[4:5]
1156; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
1157; GFX6-NEXT:    v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17]
1158; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0
1159; GFX6-NEXT:    v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13]
1160; GFX6-NEXT:    v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0
1161; GFX6-NEXT:    s_setpc_b64 s[30:31]
1162;
1163; GFX8-LABEL: v_rcp_v2f64_ulp25:
1164; GFX8:       ; %bb.0:
1165; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1166; GFX8-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
1167; GFX8-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
1168; GFX8-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
1169; GFX8-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1170; GFX8-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1171; GFX8-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1172; GFX8-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1173; GFX8-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1174; GFX8-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
1175; GFX8-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1176; GFX8-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
1177; GFX8-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
1178; GFX8-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
1179; GFX8-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
1180; GFX8-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
1181; GFX8-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
1182; GFX8-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
1183; GFX8-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
1184; GFX8-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
1185; GFX8-NEXT:    s_mov_b64 vcc, s[4:5]
1186; GFX8-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
1187; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
1188; GFX8-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
1189; GFX8-NEXT:    s_setpc_b64 s[30:31]
1190;
1191; GFX9-LABEL: v_rcp_v2f64_ulp25:
1192; GFX9:       ; %bb.0:
1193; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1194; GFX9-NEXT:    v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0
1195; GFX9-NEXT:    v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0
1196; GFX9-NEXT:    v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0
1197; GFX9-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1198; GFX9-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1199; GFX9-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1200; GFX9-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1201; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1202; GFX9-NEXT:    v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0
1203; GFX9-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1204; GFX9-NEXT:    v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0
1205; GFX9-NEXT:    v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0
1206; GFX9-NEXT:    v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9]
1207; GFX9-NEXT:    v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11]
1208; GFX9-NEXT:    v_mul_f64 v[14:15], v[12:13], v[8:9]
1209; GFX9-NEXT:    v_mul_f64 v[18:19], v[16:17], v[10:11]
1210; GFX9-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13]
1211; GFX9-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17]
1212; GFX9-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
1213; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
1214; GFX9-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
1215; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
1216; GFX9-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
1217; GFX9-NEXT:    s_setpc_b64 s[30:31]
1218;
1219; GFX10-LABEL: v_rcp_v2f64_ulp25:
1220; GFX10:       ; %bb.0:
1221; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1222; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1223; GFX10-NEXT:    v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0
1224; GFX10-NEXT:    v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0
1225; GFX10-NEXT:    v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0
1226; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1227; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1228; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1229; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1230; GFX10-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1231; GFX10-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1232; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1233; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1234; GFX10-NEXT:    v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9]
1235; GFX10-NEXT:    v_div_scale_f64 v[12:13], s4, 1.0, v[2:3], 1.0
1236; GFX10-NEXT:    v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11]
1237; GFX10-NEXT:    v_mul_f64 v[14:15], v[16:17], v[8:9]
1238; GFX10-NEXT:    v_mul_f64 v[18:19], v[12:13], v[10:11]
1239; GFX10-NEXT:    v_fma_f64 v[4:5], -v[4:5], v[14:15], v[16:17]
1240; GFX10-NEXT:    v_fma_f64 v[6:7], -v[6:7], v[18:19], v[12:13]
1241; GFX10-NEXT:    v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15]
1242; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
1243; GFX10-NEXT:    v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19]
1244; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0
1245; GFX10-NEXT:    v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0
1246; GFX10-NEXT:    s_setpc_b64 s[30:31]
1247  %fdiv = fdiv <2 x double> <double 1.0, double 1.0>, %x, !fpmath !0
1248  ret <2 x double> %fdiv
1249}
1250
1251define <2 x double> @v_fdiv_v2f64_afn_ulp25(<2 x double> %a, <2 x double> %b) {
1252; GCN-LABEL: v_fdiv_v2f64_afn_ulp25:
1253; GCN:       ; %bb.0:
1254; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1255; GCN-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1256; GCN-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1257; GCN-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1258; GCN-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1259; GCN-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1260; GCN-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1261; GCN-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1262; GCN-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1263; GCN-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1264; GCN-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1265; GCN-NEXT:    v_mul_f64 v[12:13], v[0:1], v[8:9]
1266; GCN-NEXT:    v_mul_f64 v[14:15], v[2:3], v[10:11]
1267; GCN-NEXT:    v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1]
1268; GCN-NEXT:    v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3]
1269; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13]
1270; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15]
1271; GCN-NEXT:    s_setpc_b64 s[30:31]
1272;
1273; GFX10-LABEL: v_fdiv_v2f64_afn_ulp25:
1274; GFX10:       ; %bb.0:
1275; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1276; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1277; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1278; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1279; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1280; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1281; GFX10-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1282; GFX10-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1283; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1284; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1285; GFX10-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1286; GFX10-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1287; GFX10-NEXT:    v_mul_f64 v[12:13], v[0:1], v[8:9]
1288; GFX10-NEXT:    v_mul_f64 v[14:15], v[2:3], v[10:11]
1289; GFX10-NEXT:    v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1]
1290; GFX10-NEXT:    v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3]
1291; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13]
1292; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15]
1293; GFX10-NEXT:    s_setpc_b64 s[30:31]
1294  %fdiv = fdiv afn <2 x double> %a, %b, !fpmath !0
1295  ret <2 x double> %fdiv
1296}
1297
1298define <2 x double> @v_fdiv_v2f64_arcp_ulp25(<2 x double> %a, <2 x double> %b) {
1299; GFX6-LABEL: v_fdiv_v2f64_arcp_ulp25:
1300; GFX6:       ; %bb.0:
1301; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1302; GFX6-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
1303; GFX6-NEXT:    v_div_scale_f64 v[14:15], s[4:5], v[6:7], v[6:7], v[2:3]
1304; GFX6-NEXT:    v_rcp_f64_e32 v[10:11], v[8:9]
1305; GFX6-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[0:1], v[4:5], v[0:1]
1306; GFX6-NEXT:    v_rcp_f64_e32 v[16:17], v[14:15]
1307; GFX6-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0
1308; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v19
1309; GFX6-NEXT:    v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11]
1310; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v5, v9
1311; GFX6-NEXT:    v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0
1312; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
1313; GFX6-NEXT:    v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11]
1314; GFX6-NEXT:    v_fma_f64 v[12:13], -v[14:15], v[16:17], 1.0
1315; GFX6-NEXT:    v_cmp_eq_u32_e64 s[4:5], v7, v15
1316; GFX6-NEXT:    v_fma_f64 v[12:13], v[16:17], v[12:13], v[16:17]
1317; GFX6-NEXT:    v_mul_f64 v[16:17], v[18:19], v[10:11]
1318; GFX6-NEXT:    v_fma_f64 v[18:19], -v[8:9], v[16:17], v[18:19]
1319; GFX6-NEXT:    v_fma_f64 v[8:9], -v[14:15], v[12:13], 1.0
1320; GFX6-NEXT:    v_div_fmas_f64 v[10:11], v[18:19], v[10:11], v[16:17]
1321; GFX6-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13]
1322; GFX6-NEXT:    v_div_scale_f64 v[12:13], s[6:7], v[2:3], v[6:7], v[2:3]
1323; GFX6-NEXT:    v_div_fixup_f64 v[0:1], v[10:11], v[4:5], v[0:1]
1324; GFX6-NEXT:    v_mul_f64 v[16:17], v[12:13], v[8:9]
1325; GFX6-NEXT:    v_cmp_eq_u32_e32 vcc, v3, v13
1326; GFX6-NEXT:    v_fma_f64 v[18:19], -v[14:15], v[16:17], v[12:13]
1327; GFX6-NEXT:    s_xor_b64 vcc, vcc, s[4:5]
1328; GFX6-NEXT:    s_nop 1
1329; GFX6-NEXT:    v_div_fmas_f64 v[8:9], v[18:19], v[8:9], v[16:17]
1330; GFX6-NEXT:    v_div_fixup_f64 v[2:3], v[8:9], v[6:7], v[2:3]
1331; GFX6-NEXT:    s_setpc_b64 s[30:31]
1332;
1333; GFX8-LABEL: v_fdiv_v2f64_arcp_ulp25:
1334; GFX8:       ; %bb.0:
1335; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1336; GFX8-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
1337; GFX8-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3]
1338; GFX8-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
1339; GFX8-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
1340; GFX8-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
1341; GFX8-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
1342; GFX8-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
1343; GFX8-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
1344; GFX8-NEXT:    v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1]
1345; GFX8-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
1346; GFX8-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
1347; GFX8-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0
1348; GFX8-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
1349; GFX8-NEXT:    v_mul_f64 v[16:17], v[18:19], v[12:13]
1350; GFX8-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19]
1351; GFX8-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3]
1352; GFX8-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17]
1353; GFX8-NEXT:    s_mov_b64 vcc, s[4:5]
1354; GFX8-NEXT:    v_mul_f64 v[20:21], v[18:19], v[14:15]
1355; GFX8-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
1356; GFX8-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19]
1357; GFX8-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21]
1358; GFX8-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
1359; GFX8-NEXT:    s_setpc_b64 s[30:31]
1360;
1361; GFX9-LABEL: v_fdiv_v2f64_arcp_ulp25:
1362; GFX9:       ; %bb.0:
1363; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1364; GFX9-NEXT:    v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1]
1365; GFX9-NEXT:    v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3]
1366; GFX9-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
1367; GFX9-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
1368; GFX9-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
1369; GFX9-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
1370; GFX9-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
1371; GFX9-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
1372; GFX9-NEXT:    v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1]
1373; GFX9-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
1374; GFX9-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
1375; GFX9-NEXT:    v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0
1376; GFX9-NEXT:    v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15]
1377; GFX9-NEXT:    v_mul_f64 v[16:17], v[18:19], v[12:13]
1378; GFX9-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19]
1379; GFX9-NEXT:    v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3]
1380; GFX9-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17]
1381; GFX9-NEXT:    s_mov_b64 vcc, s[4:5]
1382; GFX9-NEXT:    v_mul_f64 v[20:21], v[18:19], v[14:15]
1383; GFX9-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
1384; GFX9-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19]
1385; GFX9-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21]
1386; GFX9-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
1387; GFX9-NEXT:    s_setpc_b64 s[30:31]
1388;
1389; GFX10-LABEL: v_fdiv_v2f64_arcp_ulp25:
1390; GFX10:       ; %bb.0:
1391; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1392; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1393; GFX10-NEXT:    v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1]
1394; GFX10-NEXT:    v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3]
1395; GFX10-NEXT:    v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1]
1396; GFX10-NEXT:    v_rcp_f64_e32 v[12:13], v[8:9]
1397; GFX10-NEXT:    v_rcp_f64_e32 v[14:15], v[10:11]
1398; GFX10-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
1399; GFX10-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
1400; GFX10-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
1401; GFX10-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
1402; GFX10-NEXT:    v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0
1403; GFX10-NEXT:    v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0
1404; GFX10-NEXT:    v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13]
1405; GFX10-NEXT:    v_div_scale_f64 v[16:17], s4, v[2:3], v[6:7], v[2:3]
1406; GFX10-NEXT:    v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15]
1407; GFX10-NEXT:    v_mul_f64 v[18:19], v[20:21], v[12:13]
1408; GFX10-NEXT:    v_mul_f64 v[22:23], v[16:17], v[14:15]
1409; GFX10-NEXT:    v_fma_f64 v[8:9], -v[8:9], v[18:19], v[20:21]
1410; GFX10-NEXT:    v_fma_f64 v[10:11], -v[10:11], v[22:23], v[16:17]
1411; GFX10-NEXT:    v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[18:19]
1412; GFX10-NEXT:    s_mov_b32 vcc_lo, s4
1413; GFX10-NEXT:    v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[22:23]
1414; GFX10-NEXT:    v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1]
1415; GFX10-NEXT:    v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3]
1416; GFX10-NEXT:    s_setpc_b64 s[30:31]
1417  %fdiv = fdiv arcp <2 x double> %a, %b, !fpmath !0
1418  ret <2 x double> %fdiv
1419}
1420
1421define <2 x double> @v_fdiv_v2f64_arcp_afn_ulp25(<2 x double> %a, <2 x double> %b) {
1422; GCN-LABEL: v_fdiv_v2f64_arcp_afn_ulp25:
1423; GCN:       ; %bb.0:
1424; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1425; GCN-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1426; GCN-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1427; GCN-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1428; GCN-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1429; GCN-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1430; GCN-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1431; GCN-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1432; GCN-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1433; GCN-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1434; GCN-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1435; GCN-NEXT:    v_mul_f64 v[12:13], v[0:1], v[8:9]
1436; GCN-NEXT:    v_mul_f64 v[14:15], v[2:3], v[10:11]
1437; GCN-NEXT:    v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1]
1438; GCN-NEXT:    v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3]
1439; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13]
1440; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15]
1441; GCN-NEXT:    s_setpc_b64 s[30:31]
1442;
1443; GFX10-LABEL: v_fdiv_v2f64_arcp_afn_ulp25:
1444; GFX10:       ; %bb.0:
1445; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1446; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1447; GFX10-NEXT:    v_rcp_f64_e32 v[8:9], v[4:5]
1448; GFX10-NEXT:    v_rcp_f64_e32 v[10:11], v[6:7]
1449; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1450; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1451; GFX10-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1452; GFX10-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1453; GFX10-NEXT:    v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0
1454; GFX10-NEXT:    v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0
1455; GFX10-NEXT:    v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9]
1456; GFX10-NEXT:    v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11]
1457; GFX10-NEXT:    v_mul_f64 v[12:13], v[0:1], v[8:9]
1458; GFX10-NEXT:    v_mul_f64 v[14:15], v[2:3], v[10:11]
1459; GFX10-NEXT:    v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1]
1460; GFX10-NEXT:    v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3]
1461; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13]
1462; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15]
1463; GFX10-NEXT:    s_setpc_b64 s[30:31]
1464  %fdiv = fdiv afn arcp <2 x double> %a, %b, !fpmath !0
1465  ret <2 x double> %fdiv
1466}
1467
1468!0 = !{float 2.500000e+00}
1469