1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; Denormal mode shouldn't matter for f16, check with and without flushing.
3; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6,GFX6-IEEE %s
4; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6,GFX6-FLUSH %s
5
6; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
7; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s
8
9; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
10; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s
11
12; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
13; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s
14
15define half @v_fdiv_f16(half %a, half %b) {
16; GFX6-IEEE-LABEL: v_fdiv_f16:
17; GFX6-IEEE:       ; %bb.0:
18; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
20; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
21; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
22; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
23; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
24; GFX6-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
25; GFX6-IEEE-NEXT:    v_fma_f32 v3, v5, v3, v3
26; GFX6-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
27; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
28; GFX6-IEEE-NEXT:    v_fma_f32 v5, v6, v3, v5
29; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
30; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
31; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
32; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
33; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
34;
35; GFX6-FLUSH-LABEL: v_fdiv_f16:
36; GFX6-FLUSH:       ; %bb.0:
37; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
39; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
40; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
41; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
42; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
43; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
44; GFX6-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
45; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v5, v3, v3
46; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
47; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
48; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v6, v3, v5
49; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
50; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
51; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
52; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
53; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
54; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
55; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
56;
57; GFX89-LABEL: v_fdiv_f16:
58; GFX89:       ; %bb.0:
59; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v1
61; GFX89-NEXT:    v_cvt_f32_f16_e32 v3, v0
62; GFX89-NEXT:    v_rcp_f32_e32 v2, v2
63; GFX89-NEXT:    v_mul_f32_e32 v2, v3, v2
64; GFX89-NEXT:    v_cvt_f16_f32_e32 v2, v2
65; GFX89-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
66; GFX89-NEXT:    s_setpc_b64 s[30:31]
67;
68; GFX10-LABEL: v_fdiv_f16:
69; GFX10:       ; %bb.0:
70; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
71; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
72; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v1
73; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v0
74; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
75; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
76; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
77; GFX10-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
78; GFX10-NEXT:    s_setpc_b64 s[30:31]
79  %fdiv = fdiv half %a, %b
80  ret half %fdiv
81}
82
83define half @v_fdiv_f16_afn(half %a, half %b) {
84; GFX6-LABEL: v_fdiv_f16_afn:
85; GFX6:       ; %bb.0:
86; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
87; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
88; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
89; GFX6-NEXT:    v_rcp_f32_e32 v1, v1
90; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v1
91; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
92; GFX6-NEXT:    s_setpc_b64 s[30:31]
93;
94; GFX89-LABEL: v_fdiv_f16_afn:
95; GFX89:       ; %bb.0:
96; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
97; GFX89-NEXT:    v_rcp_f16_e32 v1, v1
98; GFX89-NEXT:    v_mul_f16_e32 v0, v0, v1
99; GFX89-NEXT:    s_setpc_b64 s[30:31]
100;
101; GFX10-LABEL: v_fdiv_f16_afn:
102; GFX10:       ; %bb.0:
103; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
104; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
105; GFX10-NEXT:    v_rcp_f16_e32 v1, v1
106; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
107; GFX10-NEXT:    s_setpc_b64 s[30:31]
108  %fdiv = fdiv afn half %a, %b
109  ret half %fdiv
110}
111
112define half @v_fdiv_f16_ulp25(half %a, half %b) {
113; GFX6-IEEE-LABEL: v_fdiv_f16_ulp25:
114; GFX6-IEEE:       ; %bb.0:
115; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
117; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
118; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
119; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
120; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
121; GFX6-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
122; GFX6-IEEE-NEXT:    v_fma_f32 v3, v5, v3, v3
123; GFX6-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
124; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
125; GFX6-IEEE-NEXT:    v_fma_f32 v5, v6, v3, v5
126; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
127; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
128; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
129; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
130; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
131;
132; GFX6-FLUSH-LABEL: v_fdiv_f16_ulp25:
133; GFX6-FLUSH:       ; %bb.0:
134; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
135; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
136; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
137; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
138; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
139; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
140; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
141; GFX6-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
142; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v5, v3, v3
143; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
144; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
145; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v6, v3, v5
146; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
147; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
148; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
149; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
150; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
151; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
152; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
153;
154; GFX89-LABEL: v_fdiv_f16_ulp25:
155; GFX89:       ; %bb.0:
156; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v1
158; GFX89-NEXT:    v_cvt_f32_f16_e32 v3, v0
159; GFX89-NEXT:    v_rcp_f32_e32 v2, v2
160; GFX89-NEXT:    v_mul_f32_e32 v2, v3, v2
161; GFX89-NEXT:    v_cvt_f16_f32_e32 v2, v2
162; GFX89-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
163; GFX89-NEXT:    s_setpc_b64 s[30:31]
164;
165; GFX10-LABEL: v_fdiv_f16_ulp25:
166; GFX10:       ; %bb.0:
167; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
169; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v1
170; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v0
171; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
172; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
173; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
174; GFX10-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
175; GFX10-NEXT:    s_setpc_b64 s[30:31]
176  %fdiv = fdiv half %a, %b, !fpmath !0
177  ret half %fdiv
178}
179
180define half @v_rcp_f16(half %x) {
181; GFX6-IEEE-LABEL: v_rcp_f16:
182; GFX6-IEEE:       ; %bb.0:
183; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
184; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, 1.0
185; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
186; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v0, v0, v1
187; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
188; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, vcc, v1, v0, v1
189; GFX6-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
190; GFX6-IEEE-NEXT:    v_fma_f32 v3, v5, v3, v3
191; GFX6-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
192; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
193; GFX6-IEEE-NEXT:    v_fma_f32 v5, v6, v3, v5
194; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
195; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
196; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v0, v1
197; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
198; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
199;
200; GFX6-FLUSH-LABEL: v_rcp_f16:
201; GFX6-FLUSH:       ; %bb.0:
202; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, 1.0
204; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
205; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v0, v0, v1
206; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
207; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, vcc, v1, v0, v1
208; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
209; GFX6-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
210; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v5, v3, v3
211; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
212; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
213; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v6, v3, v5
214; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
215; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
216; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
217; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v0, v1
218; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
219; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
220; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
221;
222; GFX89-LABEL: v_rcp_f16:
223; GFX89:       ; %bb.0:
224; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
225; GFX89-NEXT:    v_cvt_f32_f16_e32 v1, v0
226; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
227; GFX89-NEXT:    v_rcp_f32_e32 v1, v1
228; GFX89-NEXT:    v_mul_f32_e32 v1, v2, v1
229; GFX89-NEXT:    v_cvt_f16_f32_e32 v1, v1
230; GFX89-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
231; GFX89-NEXT:    s_setpc_b64 s[30:31]
232;
233; GFX10-LABEL: v_rcp_f16:
234; GFX10:       ; %bb.0:
235; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
237; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v0
238; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
239; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
240; GFX10-NEXT:    v_mul_f32_e32 v1, v2, v1
241; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
242; GFX10-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
243; GFX10-NEXT:    s_setpc_b64 s[30:31]
244  %fdiv = fdiv half 1.0, %x
245  ret half %fdiv
246}
247
248define half @v_rcp_f16_arcp(half %x) {
249; GFX6-IEEE-LABEL: v_rcp_f16_arcp:
250; GFX6-IEEE:       ; %bb.0:
251; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
252; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, 1.0
253; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
254; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v0, v0, v1
255; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
256; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, vcc, v1, v0, v1
257; GFX6-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
258; GFX6-IEEE-NEXT:    v_fma_f32 v3, v5, v3, v3
259; GFX6-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
260; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
261; GFX6-IEEE-NEXT:    v_fma_f32 v5, v6, v3, v5
262; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
263; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
264; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v0, v1
265; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
266; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
267;
268; GFX6-FLUSH-LABEL: v_rcp_f16_arcp:
269; GFX6-FLUSH:       ; %bb.0:
270; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
271; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, 1.0
272; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
273; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v0, v0, v1
274; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
275; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, vcc, v1, v0, v1
276; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
277; GFX6-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
278; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v5, v3, v3
279; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
280; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
281; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v6, v3, v5
282; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
283; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
284; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
285; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v0, v1
286; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
287; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
288; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
289;
290; GFX89-LABEL: v_rcp_f16_arcp:
291; GFX89:       ; %bb.0:
292; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
293; GFX89-NEXT:    v_cvt_f32_f16_e32 v1, v0
294; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
295; GFX89-NEXT:    v_rcp_f32_e32 v1, v1
296; GFX89-NEXT:    v_mul_f32_e32 v1, v2, v1
297; GFX89-NEXT:    v_cvt_f16_f32_e32 v1, v1
298; GFX89-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
299; GFX89-NEXT:    s_setpc_b64 s[30:31]
300;
301; GFX10-LABEL: v_rcp_f16_arcp:
302; GFX10:       ; %bb.0:
303; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
304; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
305; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v0
306; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
307; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
308; GFX10-NEXT:    v_mul_f32_e32 v1, v2, v1
309; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
310; GFX10-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
311; GFX10-NEXT:    s_setpc_b64 s[30:31]
312  %fdiv = fdiv arcp half 1.0, %x
313  ret half %fdiv
314}
315
316define half @v_rcp_f16_arcp_afn(half %x) {
317; GFX6-LABEL: v_rcp_f16_arcp_afn:
318; GFX6:       ; %bb.0:
319; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
320; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
321; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, 1.0
322; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
323; GFX6-NEXT:    v_mul_f32_e32 v0, v1, v0
324; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
325; GFX6-NEXT:    s_setpc_b64 s[30:31]
326;
327; GFX89-LABEL: v_rcp_f16_arcp_afn:
328; GFX89:       ; %bb.0:
329; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330; GFX89-NEXT:    v_rcp_f16_e32 v0, v0
331; GFX89-NEXT:    s_setpc_b64 s[30:31]
332;
333; GFX10-LABEL: v_rcp_f16_arcp_afn:
334; GFX10:       ; %bb.0:
335; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
336; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
337; GFX10-NEXT:    v_rcp_f16_e32 v0, v0
338; GFX10-NEXT:    s_setpc_b64 s[30:31]
339  %fdiv = fdiv arcp afn half 1.0, %x
340  ret half %fdiv
341}
342
343define half @v_rcp_f16_ulp25(half %x) {
344; GFX6-IEEE-LABEL: v_rcp_f16_ulp25:
345; GFX6-IEEE:       ; %bb.0:
346; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
347; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, 1.0
348; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
349; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v0, v0, v1
350; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
351; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, vcc, v1, v0, v1
352; GFX6-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
353; GFX6-IEEE-NEXT:    v_fma_f32 v3, v5, v3, v3
354; GFX6-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
355; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
356; GFX6-IEEE-NEXT:    v_fma_f32 v5, v6, v3, v5
357; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
358; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
359; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v0, v1
360; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
361; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
362;
363; GFX6-FLUSH-LABEL: v_rcp_f16_ulp25:
364; GFX6-FLUSH:       ; %bb.0:
365; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
366; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, 1.0
367; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
368; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v0, v0, v1
369; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
370; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, vcc, v1, v0, v1
371; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
372; GFX6-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
373; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v5, v3, v3
374; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
375; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
376; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v6, v3, v5
377; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
378; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
379; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
380; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v0, v1
381; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
382; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
383; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
384;
385; GFX89-LABEL: v_rcp_f16_ulp25:
386; GFX89:       ; %bb.0:
387; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
388; GFX89-NEXT:    v_rcp_f16_e32 v0, v0
389; GFX89-NEXT:    s_setpc_b64 s[30:31]
390;
391; GFX10-LABEL: v_rcp_f16_ulp25:
392; GFX10:       ; %bb.0:
393; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
394; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
395; GFX10-NEXT:    v_rcp_f16_e32 v0, v0
396; GFX10-NEXT:    s_setpc_b64 s[30:31]
397  %fdiv = fdiv half 1.0, %x, !fpmath !0
398  ret half %fdiv
399}
400
401define half @v_fdiv_f16_afn_ulp25(half %a, half %b) {
402; GFX6-LABEL: v_fdiv_f16_afn_ulp25:
403; GFX6:       ; %bb.0:
404; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
406; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
407; GFX6-NEXT:    v_rcp_f32_e32 v1, v1
408; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v1
409; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
410; GFX6-NEXT:    s_setpc_b64 s[30:31]
411;
412; GFX89-LABEL: v_fdiv_f16_afn_ulp25:
413; GFX89:       ; %bb.0:
414; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415; GFX89-NEXT:    v_rcp_f16_e32 v1, v1
416; GFX89-NEXT:    v_mul_f16_e32 v0, v0, v1
417; GFX89-NEXT:    s_setpc_b64 s[30:31]
418;
419; GFX10-LABEL: v_fdiv_f16_afn_ulp25:
420; GFX10:       ; %bb.0:
421; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
423; GFX10-NEXT:    v_rcp_f16_e32 v1, v1
424; GFX10-NEXT:    v_mul_f16_e32 v0, v0, v1
425; GFX10-NEXT:    s_setpc_b64 s[30:31]
426  %fdiv = fdiv afn half %a, %b, !fpmath !0
427  ret half %fdiv
428}
429
430define half @v_fdiv_f16_arcp_ulp25(half %a, half %b) {
431; GFX6-IEEE-LABEL: v_fdiv_f16_arcp_ulp25:
432; GFX6-IEEE:       ; %bb.0:
433; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
435; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
436; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
437; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v3, v2
438; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
439; GFX6-IEEE-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
440; GFX6-IEEE-NEXT:    v_fma_f32 v3, v5, v3, v3
441; GFX6-IEEE-NEXT:    v_mul_f32_e32 v5, v4, v3
442; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v5, v4
443; GFX6-IEEE-NEXT:    v_fma_f32 v5, v6, v3, v5
444; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v5, v4
445; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
446; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
447; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
448; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
449;
450; GFX6-FLUSH-LABEL: v_fdiv_f16_arcp_ulp25:
451; GFX6-FLUSH:       ; %bb.0:
452; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
453; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
454; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
455; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v0
456; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
457; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, vcc, v0, v1, v0
458; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
459; GFX6-FLUSH-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
460; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v5, v3, v3
461; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v5, v4, v3
462; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v5, v4
463; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v6, v3, v5
464; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v5, v4
465; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
466; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
467; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v2, v1, v0
468; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
469; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
470; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
471;
472; GFX89-LABEL: v_fdiv_f16_arcp_ulp25:
473; GFX89:       ; %bb.0:
474; GFX89-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
475; GFX89-NEXT:    v_cvt_f32_f16_e32 v2, v1
476; GFX89-NEXT:    v_cvt_f32_f16_e32 v3, v0
477; GFX89-NEXT:    v_rcp_f32_e32 v2, v2
478; GFX89-NEXT:    v_mul_f32_e32 v2, v3, v2
479; GFX89-NEXT:    v_cvt_f16_f32_e32 v2, v2
480; GFX89-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
481; GFX89-NEXT:    s_setpc_b64 s[30:31]
482;
483; GFX10-LABEL: v_fdiv_f16_arcp_ulp25:
484; GFX10:       ; %bb.0:
485; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
486; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
487; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v1
488; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v0
489; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
490; GFX10-NEXT:    v_mul_f32_e32 v2, v3, v2
491; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
492; GFX10-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
493; GFX10-NEXT:    s_setpc_b64 s[30:31]
494  %fdiv = fdiv arcp half %a, %b, !fpmath !0
495  ret half %fdiv
496}
497
498define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) {
499; GFX6-IEEE-LABEL: v_fdiv_v2f16:
500; GFX6-IEEE:       ; %bb.0:
501; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
502; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
503; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v2, v2
504; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
505; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v3, v3
506; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, s[4:5], v2, v2, v0
507; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v5, v4
508; GFX6-IEEE-NEXT:    v_div_scale_f32 v6, vcc, v0, v2, v0
509; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v4, v5, 1.0
510; GFX6-IEEE-NEXT:    v_fma_f32 v5, v7, v5, v5
511; GFX6-IEEE-NEXT:    v_mul_f32_e32 v7, v6, v5
512; GFX6-IEEE-NEXT:    v_fma_f32 v8, -v4, v7, v6
513; GFX6-IEEE-NEXT:    v_fma_f32 v7, v8, v5, v7
514; GFX6-IEEE-NEXT:    v_fma_f32 v4, -v4, v7, v6
515; GFX6-IEEE-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
516; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v4, v2, v0
517; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v3, v3, v1
518; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
519; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v1, v3, v1
520; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
521; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v4, 1.0
522; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
523; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
524; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v2, v6, v5
525; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
526; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v6, v5
527; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v6
528; GFX6-IEEE-NEXT:    v_div_fixup_f32 v1, v2, v3, v1
529; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v1, v1
530; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
531;
532; GFX6-FLUSH-LABEL: v_fdiv_v2f16:
533; GFX6-FLUSH:       ; %bb.0:
534; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
535; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
536; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v2, v2
537; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, s[4:5], v2, v2, v0
538; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v5, v4
539; GFX6-FLUSH-NEXT:    v_div_scale_f32 v6, vcc, v0, v2, v0
540; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
541; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v4, v5, 1.0
542; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v7, v5, v5
543; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v7, v6, v5
544; GFX6-FLUSH-NEXT:    v_fma_f32 v8, -v4, v7, v6
545; GFX6-FLUSH-NEXT:    v_fma_f32 v7, v8, v5, v7
546; GFX6-FLUSH-NEXT:    v_fma_f32 v4, -v4, v7, v6
547; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
548; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
549; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v3, v3
550; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
551; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v4, v2, v0
552; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
553; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
554; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v3, v3, v1
555; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v4, v2
556; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v1, v3, v1
557; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
558; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v4, 1.0
559; GFX6-FLUSH-NEXT:    v_fma_f32 v4, v6, v4, v4
560; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v4
561; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v2, v6, v5
562; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v4, v6
563; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v6, v5
564; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
565; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v6
566; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v1, v2, v3, v1
567; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v1, v1
568; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
569;
570; GFX8-LABEL: v_fdiv_v2f16:
571; GFX8:       ; %bb.0:
572; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
573; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
574; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v1
575; GFX8-NEXT:    v_cvt_f32_f16_e32 v5, v4
576; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
577; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v0
578; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
579; GFX8-NEXT:    v_cvt_f32_f16_e32 v7, v6
580; GFX8-NEXT:    v_rcp_f32_e32 v5, v5
581; GFX8-NEXT:    v_mul_f32_e32 v2, v3, v2
582; GFX8-NEXT:    v_cvt_f16_f32_e32 v2, v2
583; GFX8-NEXT:    v_mul_f32_e32 v3, v7, v5
584; GFX8-NEXT:    v_cvt_f16_f32_e32 v3, v3
585; GFX8-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
586; GFX8-NEXT:    v_mov_b32_e32 v2, 16
587; GFX8-NEXT:    v_div_fixup_f16 v1, v3, v4, v6
588; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
589; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
590; GFX8-NEXT:    s_setpc_b64 s[30:31]
591;
592; GFX9-LABEL: v_fdiv_v2f16:
593; GFX9:       ; %bb.0:
594; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
596; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v1
597; GFX9-NEXT:    v_cvt_f32_f16_e32 v5, v4
598; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
599; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v0
600; GFX9-NEXT:    v_rcp_f32_e32 v2, v2
601; GFX9-NEXT:    v_cvt_f32_f16_e32 v7, v6
602; GFX9-NEXT:    v_rcp_f32_e32 v5, v5
603; GFX9-NEXT:    v_mul_f32_e32 v2, v3, v2
604; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
605; GFX9-NEXT:    v_mul_f32_e32 v3, v7, v5
606; GFX9-NEXT:    v_cvt_f16_f32_e32 v3, v3
607; GFX9-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
608; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
609; GFX9-NEXT:    v_div_fixup_f16 v1, v3, v4, v6
610; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
611; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
612; GFX9-NEXT:    s_setpc_b64 s[30:31]
613;
614; GFX10-LABEL: v_fdiv_v2f16:
615; GFX10:       ; %bb.0:
616; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
618; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
619; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, v1
620; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
621; GFX10-NEXT:    v_cvt_f32_f16_e32 v7, v0
622; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v2
623; GFX10-NEXT:    v_rcp_f32_e32 v4, v4
624; GFX10-NEXT:    v_cvt_f32_f16_e32 v6, v5
625; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
626; GFX10-NEXT:    v_mul_f32_e32 v4, v7, v4
627; GFX10-NEXT:    v_mul_f32_e32 v3, v6, v3
628; GFX10-NEXT:    v_cvt_f16_f32_e32 v4, v4
629; GFX10-NEXT:    v_cvt_f16_f32_e32 v3, v3
630; GFX10-NEXT:    v_div_fixup_f16 v0, v4, v1, v0
631; GFX10-NEXT:    v_div_fixup_f16 v2, v3, v2, v5
632; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
633; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v0, v1
634; GFX10-NEXT:    s_setpc_b64 s[30:31]
635  %fdiv = fdiv <2 x half> %a, %b
636  ret <2 x half> %fdiv
637}
638
639define <2 x half> @v_fdiv_v2f16_afn(<2 x half> %a, <2 x half> %b) {
640; GFX6-LABEL: v_fdiv_v2f16_afn:
641; GFX6:       ; %bb.0:
642; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
644; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
645; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
646; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
647; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
648; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
649; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
650; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v3
651; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
652; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
653; GFX6-NEXT:    s_setpc_b64 s[30:31]
654;
655; GFX8-LABEL: v_fdiv_v2f16_afn:
656; GFX8:       ; %bb.0:
657; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GFX8-NEXT:    v_rcp_f16_e32 v2, v1
659; GFX8-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
660; GFX8-NEXT:    v_mul_f16_e32 v2, v0, v2
661; GFX8-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
662; GFX8-NEXT:    v_mov_b32_e32 v1, 16
663; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
664; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
665; GFX8-NEXT:    s_setpc_b64 s[30:31]
666;
667; GFX9-LABEL: v_fdiv_v2f16_afn:
668; GFX9:       ; %bb.0:
669; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670; GFX9-NEXT:    v_rcp_f16_e32 v2, v1
671; GFX9-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
672; GFX9-NEXT:    v_mul_f16_e32 v2, v0, v2
673; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
674; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffff
675; GFX9-NEXT:    v_and_or_b32 v0, v2, v1, v0
676; GFX9-NEXT:    s_setpc_b64 s[30:31]
677;
678; GFX10-LABEL: v_fdiv_v2f16_afn:
679; GFX10:       ; %bb.0:
680; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
681; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
682; GFX10-NEXT:    v_rcp_f16_e32 v2, v1
683; GFX10-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
684; GFX10-NEXT:    v_mul_f16_e32 v2, v0, v2
685; GFX10-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
686; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v2, v0
687; GFX10-NEXT:    s_setpc_b64 s[30:31]
688  %fdiv = fdiv afn <2 x half> %a, %b
689  ret <2 x half> %fdiv
690}
691
692define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) {
693; GFX6-IEEE-LABEL: v_fdiv_v2f16_ulp25:
694; GFX6-IEEE:       ; %bb.0:
695; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
697; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v2, v2
698; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
699; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v3, v3
700; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, s[4:5], v2, v2, v0
701; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v5, v4
702; GFX6-IEEE-NEXT:    v_div_scale_f32 v6, vcc, v0, v2, v0
703; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v4, v5, 1.0
704; GFX6-IEEE-NEXT:    v_fma_f32 v5, v7, v5, v5
705; GFX6-IEEE-NEXT:    v_mul_f32_e32 v7, v6, v5
706; GFX6-IEEE-NEXT:    v_fma_f32 v8, -v4, v7, v6
707; GFX6-IEEE-NEXT:    v_fma_f32 v7, v8, v5, v7
708; GFX6-IEEE-NEXT:    v_fma_f32 v4, -v4, v7, v6
709; GFX6-IEEE-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
710; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v4, v2, v0
711; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v3, v3, v1
712; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
713; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v1, v3, v1
714; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
715; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v4, 1.0
716; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
717; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
718; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v2, v6, v5
719; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
720; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v6, v5
721; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v6
722; GFX6-IEEE-NEXT:    v_div_fixup_f32 v1, v2, v3, v1
723; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v1, v1
724; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
725;
726; GFX6-FLUSH-LABEL: v_fdiv_v2f16_ulp25:
727; GFX6-FLUSH:       ; %bb.0:
728; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
729; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
730; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v2, v2
731; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, s[4:5], v2, v2, v0
732; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v5, v4
733; GFX6-FLUSH-NEXT:    v_div_scale_f32 v6, vcc, v0, v2, v0
734; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
735; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v4, v5, 1.0
736; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v7, v5, v5
737; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v7, v6, v5
738; GFX6-FLUSH-NEXT:    v_fma_f32 v8, -v4, v7, v6
739; GFX6-FLUSH-NEXT:    v_fma_f32 v7, v8, v5, v7
740; GFX6-FLUSH-NEXT:    v_fma_f32 v4, -v4, v7, v6
741; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
742; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
743; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v3, v3
744; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
745; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v4, v2, v0
746; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
747; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
748; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v3, v3, v1
749; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v4, v2
750; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v1, v3, v1
751; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
752; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v4, 1.0
753; GFX6-FLUSH-NEXT:    v_fma_f32 v4, v6, v4, v4
754; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v4
755; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v2, v6, v5
756; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v4, v6
757; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v6, v5
758; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
759; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v6
760; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v1, v2, v3, v1
761; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v1, v1
762; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
763;
764; GFX8-LABEL: v_fdiv_v2f16_ulp25:
765; GFX8:       ; %bb.0:
766; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
767; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
768; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v1
769; GFX8-NEXT:    v_cvt_f32_f16_e32 v5, v4
770; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
771; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v0
772; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
773; GFX8-NEXT:    v_cvt_f32_f16_e32 v7, v6
774; GFX8-NEXT:    v_rcp_f32_e32 v5, v5
775; GFX8-NEXT:    v_mul_f32_e32 v2, v3, v2
776; GFX8-NEXT:    v_cvt_f16_f32_e32 v2, v2
777; GFX8-NEXT:    v_mul_f32_e32 v3, v7, v5
778; GFX8-NEXT:    v_cvt_f16_f32_e32 v3, v3
779; GFX8-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
780; GFX8-NEXT:    v_mov_b32_e32 v2, 16
781; GFX8-NEXT:    v_div_fixup_f16 v1, v3, v4, v6
782; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
783; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
784; GFX8-NEXT:    s_setpc_b64 s[30:31]
785;
786; GFX9-LABEL: v_fdiv_v2f16_ulp25:
787; GFX9:       ; %bb.0:
788; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
789; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
790; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v1
791; GFX9-NEXT:    v_cvt_f32_f16_e32 v5, v4
792; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
793; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v0
794; GFX9-NEXT:    v_rcp_f32_e32 v2, v2
795; GFX9-NEXT:    v_cvt_f32_f16_e32 v7, v6
796; GFX9-NEXT:    v_rcp_f32_e32 v5, v5
797; GFX9-NEXT:    v_mul_f32_e32 v2, v3, v2
798; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
799; GFX9-NEXT:    v_mul_f32_e32 v3, v7, v5
800; GFX9-NEXT:    v_cvt_f16_f32_e32 v3, v3
801; GFX9-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
802; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
803; GFX9-NEXT:    v_div_fixup_f16 v1, v3, v4, v6
804; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
805; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
806; GFX9-NEXT:    s_setpc_b64 s[30:31]
807;
808; GFX10-LABEL: v_fdiv_v2f16_ulp25:
809; GFX10:       ; %bb.0:
810; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
811; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
812; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
813; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, v1
814; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
815; GFX10-NEXT:    v_cvt_f32_f16_e32 v7, v0
816; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v2
817; GFX10-NEXT:    v_rcp_f32_e32 v4, v4
818; GFX10-NEXT:    v_cvt_f32_f16_e32 v6, v5
819; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
820; GFX10-NEXT:    v_mul_f32_e32 v4, v7, v4
821; GFX10-NEXT:    v_mul_f32_e32 v3, v6, v3
822; GFX10-NEXT:    v_cvt_f16_f32_e32 v4, v4
823; GFX10-NEXT:    v_cvt_f16_f32_e32 v3, v3
824; GFX10-NEXT:    v_div_fixup_f16 v0, v4, v1, v0
825; GFX10-NEXT:    v_div_fixup_f16 v2, v3, v2, v5
826; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
827; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v0, v1
828; GFX10-NEXT:    s_setpc_b64 s[30:31]
829  %fdiv = fdiv <2 x half> %a, %b, !fpmath !0
830  ret <2 x half> %fdiv
831}
832
833define <2 x half> @v_rcp_v2f16(<2 x half> %x) {
834; GFX6-IEEE-LABEL: v_rcp_v2f16:
835; GFX6-IEEE:       ; %bb.0:
836; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
837; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
838; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
839; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
840; GFX6-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v0, v0, v2
841; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
842; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v2, v0, v2
843; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
844; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
845; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
846; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v3, v6, v5
847; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
848; GFX6-IEEE-NEXT:    v_fma_f32 v3, -v3, v6, v5
849; GFX6-IEEE-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
850; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v3, v0, v2
851; GFX6-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
852; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
853; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v2, v1, v2
854; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
855; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
856; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
857; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
858; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v3, v6, v5
859; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
860; GFX6-IEEE-NEXT:    v_fma_f32 v3, -v3, v6, v5
861; GFX6-IEEE-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
862; GFX6-IEEE-NEXT:    v_div_fixup_f32 v1, v3, v1, v2
863; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v1, v1
864; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
865;
866; GFX6-FLUSH-LABEL: v_rcp_v2f16:
867; GFX6-FLUSH:       ; %bb.0:
868; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
869; GFX6-FLUSH-NEXT:    s_movk_i32 s6, 0x3c00
870; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v2, s6
871; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
872; GFX6-FLUSH-NEXT:    v_div_scale_f32 v3, s[4:5], v0, v0, v2
873; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
874; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v2, v0, v2
875; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
876; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
877; GFX6-FLUSH-NEXT:    v_fma_f32 v4, v6, v4, v4
878; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v4
879; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v3, v6, v5
880; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v4, v6
881; GFX6-FLUSH-NEXT:    v_fma_f32 v3, -v3, v6, v5
882; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
883; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
884; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v4, s6
885; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
886; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v3, v0, v2
887; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
888; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
889; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v4
890; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
891; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v4, v1, v4
892; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
893; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v3, 1.0
894; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v6, v3, v3
895; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v3
896; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v2, v6, v5
897; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v3, v6
898; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v6, v5
899; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
900; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v6
901; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v1, v2, v1, v4
902; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v1, v1
903; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
904;
905; GFX8-LABEL: v_rcp_v2f16:
906; GFX8:       ; %bb.0:
907; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
908; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
909; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v0
910; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v2
911; GFX8-NEXT:    v_cvt_f32_f16_e32 v4, 1.0
912; GFX8-NEXT:    v_rcp_f32_e32 v1, v1
913; GFX8-NEXT:    v_rcp_f32_e32 v3, v3
914; GFX8-NEXT:    v_mul_f32_e32 v1, v4, v1
915; GFX8-NEXT:    v_mul_f32_e32 v3, v4, v3
916; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v1
917; GFX8-NEXT:    v_cvt_f16_f32_e32 v3, v3
918; GFX8-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
919; GFX8-NEXT:    v_div_fixup_f16 v1, v3, v2, 1.0
920; GFX8-NEXT:    v_mov_b32_e32 v2, 16
921; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
922; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
923; GFX8-NEXT:    s_setpc_b64 s[30:31]
924;
925; GFX9-LABEL: v_rcp_v2f16:
926; GFX9:       ; %bb.0:
927; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
929; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v0
930; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v2
931; GFX9-NEXT:    v_cvt_f32_f16_e32 v4, 1.0
932; GFX9-NEXT:    v_rcp_f32_e32 v1, v1
933; GFX9-NEXT:    v_rcp_f32_e32 v3, v3
934; GFX9-NEXT:    v_mul_f32_e32 v1, v4, v1
935; GFX9-NEXT:    v_mul_f32_e32 v3, v4, v3
936; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
937; GFX9-NEXT:    v_cvt_f16_f32_e32 v3, v3
938; GFX9-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
939; GFX9-NEXT:    v_div_fixup_f16 v1, v3, v2, 1.0
940; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
941; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
942; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
943; GFX9-NEXT:    s_setpc_b64 s[30:31]
944;
945; GFX10-LABEL: v_rcp_v2f16:
946; GFX10:       ; %bb.0:
947; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
948; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
949; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
950; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v0
951; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, 1.0
952; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v1
953; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
954; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
955; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v3
956; GFX10-NEXT:    v_mul_f32_e32 v2, v4, v2
957; GFX10-NEXT:    v_cvt_f16_f32_e32 v3, v3
958; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
959; GFX10-NEXT:    v_div_fixup_f16 v0, v3, v0, 1.0
960; GFX10-NEXT:    v_div_fixup_f16 v1, v2, v1, 1.0
961; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
962; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v0, v1
963; GFX10-NEXT:    s_setpc_b64 s[30:31]
964  %fdiv = fdiv <2 x half> <half 1.0, half 1.0>, %x
965  ret <2 x half> %fdiv
966}
967
968define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
969; GFX6-IEEE-LABEL: v_rcp_v2f16_arcp:
970; GFX6-IEEE:       ; %bb.0:
971; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
972; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
973; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
974; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
975; GFX6-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v0, v0, v2
976; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
977; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v2, v0, v2
978; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
979; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
980; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
981; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v3, v6, v5
982; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
983; GFX6-IEEE-NEXT:    v_fma_f32 v3, -v3, v6, v5
984; GFX6-IEEE-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
985; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v3, v0, v2
986; GFX6-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
987; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
988; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v2, v1, v2
989; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
990; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
991; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
992; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
993; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v3, v6, v5
994; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
995; GFX6-IEEE-NEXT:    v_fma_f32 v3, -v3, v6, v5
996; GFX6-IEEE-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
997; GFX6-IEEE-NEXT:    v_div_fixup_f32 v1, v3, v1, v2
998; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v1, v1
999; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
1000;
1001; GFX6-FLUSH-LABEL: v_rcp_v2f16_arcp:
1002; GFX6-FLUSH:       ; %bb.0:
1003; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1004; GFX6-FLUSH-NEXT:    s_movk_i32 s6, 0x3c00
1005; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v2, s6
1006; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
1007; GFX6-FLUSH-NEXT:    v_div_scale_f32 v3, s[4:5], v0, v0, v2
1008; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
1009; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v2, v0, v2
1010; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1011; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
1012; GFX6-FLUSH-NEXT:    v_fma_f32 v4, v6, v4, v4
1013; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v4
1014; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v3, v6, v5
1015; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v4, v6
1016; GFX6-FLUSH-NEXT:    v_fma_f32 v3, -v3, v6, v5
1017; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1018; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
1019; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v4, s6
1020; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
1021; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v3, v0, v2
1022; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
1023; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
1024; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v4
1025; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
1026; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v4, v1, v4
1027; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1028; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v3, 1.0
1029; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v6, v3, v3
1030; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v3
1031; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v2, v6, v5
1032; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v3, v6
1033; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v6, v5
1034; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1035; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v6
1036; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v1, v2, v1, v4
1037; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v1, v1
1038; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
1039;
1040; GFX8-LABEL: v_rcp_v2f16_arcp:
1041; GFX8:       ; %bb.0:
1042; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1043; GFX8-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1044; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v0
1045; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v2
1046; GFX8-NEXT:    v_cvt_f32_f16_e32 v4, 1.0
1047; GFX8-NEXT:    v_rcp_f32_e32 v1, v1
1048; GFX8-NEXT:    v_rcp_f32_e32 v3, v3
1049; GFX8-NEXT:    v_mul_f32_e32 v1, v4, v1
1050; GFX8-NEXT:    v_mul_f32_e32 v3, v4, v3
1051; GFX8-NEXT:    v_cvt_f16_f32_e32 v1, v1
1052; GFX8-NEXT:    v_cvt_f16_f32_e32 v3, v3
1053; GFX8-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
1054; GFX8-NEXT:    v_div_fixup_f16 v1, v3, v2, 1.0
1055; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1056; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1057; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1058; GFX8-NEXT:    s_setpc_b64 s[30:31]
1059;
1060; GFX9-LABEL: v_rcp_v2f16_arcp:
1061; GFX9:       ; %bb.0:
1062; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1063; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
1064; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v0
1065; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v2
1066; GFX9-NEXT:    v_cvt_f32_f16_e32 v4, 1.0
1067; GFX9-NEXT:    v_rcp_f32_e32 v1, v1
1068; GFX9-NEXT:    v_rcp_f32_e32 v3, v3
1069; GFX9-NEXT:    v_mul_f32_e32 v1, v4, v1
1070; GFX9-NEXT:    v_mul_f32_e32 v3, v4, v3
1071; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
1072; GFX9-NEXT:    v_cvt_f16_f32_e32 v3, v3
1073; GFX9-NEXT:    v_div_fixup_f16 v0, v1, v0, 1.0
1074; GFX9-NEXT:    v_div_fixup_f16 v1, v3, v2, 1.0
1075; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
1076; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1077; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
1078; GFX9-NEXT:    s_setpc_b64 s[30:31]
1079;
1080; GFX10-LABEL: v_rcp_v2f16_arcp:
1081; GFX10:       ; %bb.0:
1082; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1083; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1084; GFX10-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
1085; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v0
1086; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, 1.0
1087; GFX10-NEXT:    v_cvt_f32_f16_e32 v2, v1
1088; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
1089; GFX10-NEXT:    v_rcp_f32_e32 v2, v2
1090; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v3
1091; GFX10-NEXT:    v_mul_f32_e32 v2, v4, v2
1092; GFX10-NEXT:    v_cvt_f16_f32_e32 v3, v3
1093; GFX10-NEXT:    v_cvt_f16_f32_e32 v2, v2
1094; GFX10-NEXT:    v_div_fixup_f16 v0, v3, v0, 1.0
1095; GFX10-NEXT:    v_div_fixup_f16 v1, v2, v1, 1.0
1096; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1097; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v0, v1
1098; GFX10-NEXT:    s_setpc_b64 s[30:31]
1099  %fdiv = fdiv arcp <2 x half> <half 1.0, half 1.0>, %x
1100  ret <2 x half> %fdiv
1101}
1102
1103define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
1104; GFX6-LABEL: v_rcp_v2f16_arcp_afn:
1105; GFX6:       ; %bb.0:
1106; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1107; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
1108; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
1109; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
1110; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
1111; GFX6-NEXT:    v_rcp_f32_e32 v1, v1
1112; GFX6-NEXT:    v_mul_f32_e32 v0, v2, v0
1113; GFX6-NEXT:    v_mul_f32_e32 v1, v2, v1
1114; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
1115; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
1116; GFX6-NEXT:    s_setpc_b64 s[30:31]
1117;
1118; GFX8-LABEL: v_rcp_v2f16_arcp_afn:
1119; GFX8:       ; %bb.0:
1120; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1121; GFX8-NEXT:    v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1122; GFX8-NEXT:    v_rcp_f16_e32 v0, v0
1123; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1124; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1125; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1126; GFX8-NEXT:    s_setpc_b64 s[30:31]
1127;
1128; GFX9-LABEL: v_rcp_v2f16_arcp_afn:
1129; GFX9:       ; %bb.0:
1130; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1131; GFX9-NEXT:    v_rcp_f16_e32 v1, v0
1132; GFX9-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
1133; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
1134; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
1135; GFX9-NEXT:    s_setpc_b64 s[30:31]
1136;
1137; GFX10-LABEL: v_rcp_v2f16_arcp_afn:
1138; GFX10:       ; %bb.0:
1139; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1140; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1141; GFX10-NEXT:    v_rcp_f16_e32 v1, v0
1142; GFX10-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
1143; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v1, v0
1144; GFX10-NEXT:    s_setpc_b64 s[30:31]
1145  %fdiv = fdiv arcp afn <2 x half> <half 1.0, half 1.0>, %x
1146  ret <2 x half> %fdiv
1147}
1148
1149define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) {
1150; GFX6-IEEE-LABEL: v_rcp_v2f16_ulp25:
1151; GFX6-IEEE:       ; %bb.0:
1152; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1153; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v2, 1.0
1154; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
1155; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
1156; GFX6-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v0, v0, v2
1157; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
1158; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v2, v0, v2
1159; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
1160; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
1161; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
1162; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v3, v6, v5
1163; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
1164; GFX6-IEEE-NEXT:    v_fma_f32 v3, -v3, v6, v5
1165; GFX6-IEEE-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
1166; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v3, v0, v2
1167; GFX6-IEEE-NEXT:    v_div_scale_f32 v3, s[4:5], v1, v1, v2
1168; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v3
1169; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v2, v1, v2
1170; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
1171; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
1172; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
1173; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
1174; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v3, v6, v5
1175; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
1176; GFX6-IEEE-NEXT:    v_fma_f32 v3, -v3, v6, v5
1177; GFX6-IEEE-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
1178; GFX6-IEEE-NEXT:    v_div_fixup_f32 v1, v3, v1, v2
1179; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v1, v1
1180; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
1181;
1182; GFX6-FLUSH-LABEL: v_rcp_v2f16_ulp25:
1183; GFX6-FLUSH:       ; %bb.0:
1184; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185; GFX6-FLUSH-NEXT:    s_movk_i32 s6, 0x3c00
1186; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v2, s6
1187; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
1188; GFX6-FLUSH-NEXT:    v_div_scale_f32 v3, s[4:5], v0, v0, v2
1189; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v4, v3
1190; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v2, v0, v2
1191; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1192; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v3, v4, 1.0
1193; GFX6-FLUSH-NEXT:    v_fma_f32 v4, v6, v4, v4
1194; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v4
1195; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v3, v6, v5
1196; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v4, v6
1197; GFX6-FLUSH-NEXT:    v_fma_f32 v3, -v3, v6, v5
1198; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1199; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v3, v3, v4, v6
1200; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v4, s6
1201; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
1202; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v3, v0, v2
1203; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
1204; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
1205; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v1, v1, v4
1206; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v3, v2
1207; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v4, v1, v4
1208; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1209; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v3, 1.0
1210; GFX6-FLUSH-NEXT:    v_fma_f32 v3, v6, v3, v3
1211; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v3
1212; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v2, v6, v5
1213; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v3, v6
1214; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v6, v5
1215; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1216; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v3, v6
1217; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v1, v2, v1, v4
1218; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v1, v1
1219; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
1220;
1221; GFX8-LABEL: v_rcp_v2f16_ulp25:
1222; GFX8:       ; %bb.0:
1223; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1224; GFX8-NEXT:    v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1225; GFX8-NEXT:    v_rcp_f16_e32 v0, v0
1226; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1227; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1228; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1229; GFX8-NEXT:    s_setpc_b64 s[30:31]
1230;
1231; GFX9-LABEL: v_rcp_v2f16_ulp25:
1232; GFX9:       ; %bb.0:
1233; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1234; GFX9-NEXT:    v_rcp_f16_e32 v1, v0
1235; GFX9-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
1236; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
1237; GFX9-NEXT:    v_and_or_b32 v0, v1, v2, v0
1238; GFX9-NEXT:    s_setpc_b64 s[30:31]
1239;
1240; GFX10-LABEL: v_rcp_v2f16_ulp25:
1241; GFX10:       ; %bb.0:
1242; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1243; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1244; GFX10-NEXT:    v_rcp_f16_e32 v1, v0
1245; GFX10-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
1246; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v1, v0
1247; GFX10-NEXT:    s_setpc_b64 s[30:31]
1248  %fdiv = fdiv <2 x half> <half 1.0, half 1.0>, %x, !fpmath !0
1249  ret <2 x half> %fdiv
1250}
1251
1252define <2 x half> @v_fdiv_v2f16_afn_ulp25(<2 x half> %a, <2 x half> %b) {
1253; GFX6-LABEL: v_fdiv_v2f16_afn_ulp25:
1254; GFX6:       ; %bb.0:
1255; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1256; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
1257; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
1258; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
1259; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
1260; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
1261; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
1262; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
1263; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v3
1264; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
1265; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
1266; GFX6-NEXT:    s_setpc_b64 s[30:31]
1267;
1268; GFX8-LABEL: v_fdiv_v2f16_afn_ulp25:
1269; GFX8:       ; %bb.0:
1270; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1271; GFX8-NEXT:    v_rcp_f16_e32 v2, v1
1272; GFX8-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1273; GFX8-NEXT:    v_mul_f16_e32 v2, v0, v2
1274; GFX8-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1275; GFX8-NEXT:    v_mov_b32_e32 v1, 16
1276; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1277; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1278; GFX8-NEXT:    s_setpc_b64 s[30:31]
1279;
1280; GFX9-LABEL: v_fdiv_v2f16_afn_ulp25:
1281; GFX9:       ; %bb.0:
1282; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1283; GFX9-NEXT:    v_rcp_f16_e32 v2, v1
1284; GFX9-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1285; GFX9-NEXT:    v_mul_f16_e32 v2, v0, v2
1286; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1287; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffff
1288; GFX9-NEXT:    v_and_or_b32 v0, v2, v1, v0
1289; GFX9-NEXT:    s_setpc_b64 s[30:31]
1290;
1291; GFX10-LABEL: v_fdiv_v2f16_afn_ulp25:
1292; GFX10:       ; %bb.0:
1293; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1294; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1295; GFX10-NEXT:    v_rcp_f16_e32 v2, v1
1296; GFX10-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1297; GFX10-NEXT:    v_mul_f16_e32 v2, v0, v2
1298; GFX10-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1299; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v2, v0
1300; GFX10-NEXT:    s_setpc_b64 s[30:31]
1301  %fdiv = fdiv afn <2 x half> %a, %b, !fpmath !0
1302  ret <2 x half> %fdiv
1303}
1304
1305define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) {
1306; GFX6-IEEE-LABEL: v_fdiv_v2f16_arcp_ulp25:
1307; GFX6-IEEE:       ; %bb.0:
1308; GFX6-IEEE-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1309; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v0, v0
1310; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v2, v2
1311; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v1, v1
1312; GFX6-IEEE-NEXT:    v_cvt_f32_f16_e32 v3, v3
1313; GFX6-IEEE-NEXT:    v_div_scale_f32 v4, s[4:5], v2, v2, v0
1314; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v5, v4
1315; GFX6-IEEE-NEXT:    v_div_scale_f32 v6, vcc, v0, v2, v0
1316; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v4, v5, 1.0
1317; GFX6-IEEE-NEXT:    v_fma_f32 v5, v7, v5, v5
1318; GFX6-IEEE-NEXT:    v_mul_f32_e32 v7, v6, v5
1319; GFX6-IEEE-NEXT:    v_fma_f32 v8, -v4, v7, v6
1320; GFX6-IEEE-NEXT:    v_fma_f32 v7, v8, v5, v7
1321; GFX6-IEEE-NEXT:    v_fma_f32 v4, -v4, v7, v6
1322; GFX6-IEEE-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
1323; GFX6-IEEE-NEXT:    v_div_fixup_f32 v0, v4, v2, v0
1324; GFX6-IEEE-NEXT:    v_div_scale_f32 v2, s[4:5], v3, v3, v1
1325; GFX6-IEEE-NEXT:    v_rcp_f32_e32 v4, v2
1326; GFX6-IEEE-NEXT:    v_div_scale_f32 v5, vcc, v1, v3, v1
1327; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v0, v0
1328; GFX6-IEEE-NEXT:    v_fma_f32 v6, -v2, v4, 1.0
1329; GFX6-IEEE-NEXT:    v_fma_f32 v4, v6, v4, v4
1330; GFX6-IEEE-NEXT:    v_mul_f32_e32 v6, v5, v4
1331; GFX6-IEEE-NEXT:    v_fma_f32 v7, -v2, v6, v5
1332; GFX6-IEEE-NEXT:    v_fma_f32 v6, v7, v4, v6
1333; GFX6-IEEE-NEXT:    v_fma_f32 v2, -v2, v6, v5
1334; GFX6-IEEE-NEXT:    v_div_fmas_f32 v2, v2, v4, v6
1335; GFX6-IEEE-NEXT:    v_div_fixup_f32 v1, v2, v3, v1
1336; GFX6-IEEE-NEXT:    v_cvt_f16_f32_e32 v1, v1
1337; GFX6-IEEE-NEXT:    s_setpc_b64 s[30:31]
1338;
1339; GFX6-FLUSH-LABEL: v_fdiv_v2f16_arcp_ulp25:
1340; GFX6-FLUSH:       ; %bb.0:
1341; GFX6-FLUSH-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1342; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v0, v0
1343; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v2, v2
1344; GFX6-FLUSH-NEXT:    v_div_scale_f32 v4, s[4:5], v2, v2, v0
1345; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v5, v4
1346; GFX6-FLUSH-NEXT:    v_div_scale_f32 v6, vcc, v0, v2, v0
1347; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1348; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v4, v5, 1.0
1349; GFX6-FLUSH-NEXT:    v_fma_f32 v5, v7, v5, v5
1350; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v7, v6, v5
1351; GFX6-FLUSH-NEXT:    v_fma_f32 v8, -v4, v7, v6
1352; GFX6-FLUSH-NEXT:    v_fma_f32 v7, v8, v5, v7
1353; GFX6-FLUSH-NEXT:    v_fma_f32 v4, -v4, v7, v6
1354; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1355; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v1, v1
1356; GFX6-FLUSH-NEXT:    v_cvt_f32_f16_e32 v3, v3
1357; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v4, v4, v5, v7
1358; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v0, v4, v2, v0
1359; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0
1360; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v0, v0
1361; GFX6-FLUSH-NEXT:    v_div_scale_f32 v2, s[4:5], v3, v3, v1
1362; GFX6-FLUSH-NEXT:    v_rcp_f32_e32 v4, v2
1363; GFX6-FLUSH-NEXT:    v_div_scale_f32 v5, vcc, v1, v3, v1
1364; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
1365; GFX6-FLUSH-NEXT:    v_fma_f32 v6, -v2, v4, 1.0
1366; GFX6-FLUSH-NEXT:    v_fma_f32 v4, v6, v4, v4
1367; GFX6-FLUSH-NEXT:    v_mul_f32_e32 v6, v5, v4
1368; GFX6-FLUSH-NEXT:    v_fma_f32 v7, -v2, v6, v5
1369; GFX6-FLUSH-NEXT:    v_fma_f32 v6, v7, v4, v6
1370; GFX6-FLUSH-NEXT:    v_fma_f32 v2, -v2, v6, v5
1371; GFX6-FLUSH-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
1372; GFX6-FLUSH-NEXT:    v_div_fmas_f32 v2, v2, v4, v6
1373; GFX6-FLUSH-NEXT:    v_div_fixup_f32 v1, v2, v3, v1
1374; GFX6-FLUSH-NEXT:    v_cvt_f16_f32_e32 v1, v1
1375; GFX6-FLUSH-NEXT:    s_setpc_b64 s[30:31]
1376;
1377; GFX8-LABEL: v_fdiv_v2f16_arcp_ulp25:
1378; GFX8:       ; %bb.0:
1379; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1380; GFX8-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
1381; GFX8-NEXT:    v_cvt_f32_f16_e32 v2, v1
1382; GFX8-NEXT:    v_cvt_f32_f16_e32 v5, v4
1383; GFX8-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1384; GFX8-NEXT:    v_cvt_f32_f16_e32 v3, v0
1385; GFX8-NEXT:    v_rcp_f32_e32 v2, v2
1386; GFX8-NEXT:    v_cvt_f32_f16_e32 v7, v6
1387; GFX8-NEXT:    v_rcp_f32_e32 v5, v5
1388; GFX8-NEXT:    v_mul_f32_e32 v2, v3, v2
1389; GFX8-NEXT:    v_cvt_f16_f32_e32 v2, v2
1390; GFX8-NEXT:    v_mul_f32_e32 v3, v7, v5
1391; GFX8-NEXT:    v_cvt_f16_f32_e32 v3, v3
1392; GFX8-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
1393; GFX8-NEXT:    v_mov_b32_e32 v2, 16
1394; GFX8-NEXT:    v_div_fixup_f16 v1, v3, v4, v6
1395; GFX8-NEXT:    v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1396; GFX8-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1397; GFX8-NEXT:    s_setpc_b64 s[30:31]
1398;
1399; GFX9-LABEL: v_fdiv_v2f16_arcp_ulp25:
1400; GFX9:       ; %bb.0:
1401; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1402; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 16, v1
1403; GFX9-NEXT:    v_cvt_f32_f16_e32 v2, v1
1404; GFX9-NEXT:    v_cvt_f32_f16_e32 v5, v4
1405; GFX9-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
1406; GFX9-NEXT:    v_cvt_f32_f16_e32 v3, v0
1407; GFX9-NEXT:    v_rcp_f32_e32 v2, v2
1408; GFX9-NEXT:    v_cvt_f32_f16_e32 v7, v6
1409; GFX9-NEXT:    v_rcp_f32_e32 v5, v5
1410; GFX9-NEXT:    v_mul_f32_e32 v2, v3, v2
1411; GFX9-NEXT:    v_cvt_f16_f32_e32 v2, v2
1412; GFX9-NEXT:    v_mul_f32_e32 v3, v7, v5
1413; GFX9-NEXT:    v_cvt_f16_f32_e32 v3, v3
1414; GFX9-NEXT:    v_div_fixup_f16 v0, v2, v1, v0
1415; GFX9-NEXT:    v_mov_b32_e32 v2, 0xffff
1416; GFX9-NEXT:    v_div_fixup_f16 v1, v3, v4, v6
1417; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
1418; GFX9-NEXT:    v_and_or_b32 v0, v0, v2, v1
1419; GFX9-NEXT:    s_setpc_b64 s[30:31]
1420;
1421; GFX10-LABEL: v_fdiv_v2f16_arcp_ulp25:
1422; GFX10:       ; %bb.0:
1423; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1424; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1425; GFX10-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
1426; GFX10-NEXT:    v_cvt_f32_f16_e32 v4, v1
1427; GFX10-NEXT:    v_lshrrev_b32_e32 v5, 16, v0
1428; GFX10-NEXT:    v_cvt_f32_f16_e32 v7, v0
1429; GFX10-NEXT:    v_cvt_f32_f16_e32 v3, v2
1430; GFX10-NEXT:    v_rcp_f32_e32 v4, v4
1431; GFX10-NEXT:    v_cvt_f32_f16_e32 v6, v5
1432; GFX10-NEXT:    v_rcp_f32_e32 v3, v3
1433; GFX10-NEXT:    v_mul_f32_e32 v4, v7, v4
1434; GFX10-NEXT:    v_mul_f32_e32 v3, v6, v3
1435; GFX10-NEXT:    v_cvt_f16_f32_e32 v4, v4
1436; GFX10-NEXT:    v_cvt_f16_f32_e32 v3, v3
1437; GFX10-NEXT:    v_div_fixup_f16 v0, v4, v1, v0
1438; GFX10-NEXT:    v_div_fixup_f16 v2, v3, v2, v5
1439; GFX10-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
1440; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v0, v1
1441; GFX10-NEXT:    s_setpc_b64 s[30:31]
1442  %fdiv = fdiv arcp <2 x half> %a, %b, !fpmath !0
1443  ret <2 x half> %fdiv
1444}
1445
1446define <2 x half> @v_fdiv_v2f16_arcp_afn_ulp25(<2 x half> %a, <2 x half> %b) {
1447; GFX6-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
1448; GFX6:       ; %bb.0:
1449; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1450; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
1451; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
1452; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
1453; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
1454; GFX6-NEXT:    v_rcp_f32_e32 v2, v2
1455; GFX6-NEXT:    v_rcp_f32_e32 v3, v3
1456; GFX6-NEXT:    v_mul_f32_e32 v0, v0, v2
1457; GFX6-NEXT:    v_mul_f32_e32 v1, v1, v3
1458; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
1459; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
1460; GFX6-NEXT:    s_setpc_b64 s[30:31]
1461;
1462; GFX8-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
1463; GFX8:       ; %bb.0:
1464; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1465; GFX8-NEXT:    v_rcp_f16_e32 v2, v1
1466; GFX8-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1467; GFX8-NEXT:    v_mul_f16_e32 v2, v0, v2
1468; GFX8-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1469; GFX8-NEXT:    v_mov_b32_e32 v1, 16
1470; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
1471; GFX8-NEXT:    v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
1472; GFX8-NEXT:    s_setpc_b64 s[30:31]
1473;
1474; GFX9-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
1475; GFX9:       ; %bb.0:
1476; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1477; GFX9-NEXT:    v_rcp_f16_e32 v2, v1
1478; GFX9-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1479; GFX9-NEXT:    v_mul_f16_e32 v2, v0, v2
1480; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1481; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffff
1482; GFX9-NEXT:    v_and_or_b32 v0, v2, v1, v0
1483; GFX9-NEXT:    s_setpc_b64 s[30:31]
1484;
1485; GFX10-LABEL: v_fdiv_v2f16_arcp_afn_ulp25:
1486; GFX10:       ; %bb.0:
1487; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1488; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
1489; GFX10-NEXT:    v_rcp_f16_e32 v2, v1
1490; GFX10-NEXT:    v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
1491; GFX10-NEXT:    v_mul_f16_e32 v2, v0, v2
1492; GFX10-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
1493; GFX10-NEXT:    v_and_or_b32 v0, 0xffff, v2, v0
1494; GFX10-NEXT:    s_setpc_b64 s[30:31]
1495  %fdiv = fdiv afn arcp <2 x half> %a, %b, !fpmath !0
1496  ret <2 x half> %fdiv
1497}
1498
1499!0 = !{float 2.500000e+00}
1500