1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; Denormal mode shouldn't matter for f16, check with and without flushing. 3; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6,GFX6-IEEE %s 4; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6,GFX6-FLUSH %s 5 6; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s 7; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX8 %s 8 9; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s 10; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX89,GFX9 %s 11 12; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 13; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 14 15define half @v_fdiv_f16(half %a, half %b) { 16; GFX6-IEEE-LABEL: v_fdiv_f16: 17; GFX6-IEEE: ; %bb.0: 18; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 20; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 21; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 22; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 23; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 24; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 25; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 26; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 27; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v4 28; GFX6-IEEE-NEXT: v_fma_f32 v5, v6, v3, v5 29; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 30; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 31; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 32; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 33; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 34; 35; GFX6-FLUSH-LABEL: v_fdiv_f16: 36; GFX6-FLUSH: ; %bb.0: 37; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 38; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 39; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 40; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 41; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 42; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 43; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 44; GFX6-FLUSH-NEXT: v_fma_f32 v5, -v2, v3, 1.0 45; GFX6-FLUSH-NEXT: v_fma_f32 v3, v5, v3, v3 46; GFX6-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v3 47; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v4 48; GFX6-FLUSH-NEXT: v_fma_f32 v5, v6, v3, v5 49; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 50; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 51; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 52; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 53; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 54; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 55; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX89-LABEL: v_fdiv_f16: 58; GFX89: ; %bb.0: 59; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1 61; GFX89-NEXT: v_cvt_f32_f16_e32 v3, v0 62; GFX89-NEXT: v_rcp_f32_e32 v2, v2 63; GFX89-NEXT: v_mul_f32_e32 v2, v3, v2 64; GFX89-NEXT: v_cvt_f16_f32_e32 v2, v2 65; GFX89-NEXT: v_div_fixup_f16 v0, v2, v1, v0 66; GFX89-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX10-LABEL: v_fdiv_f16: 69; GFX10: ; %bb.0: 70; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 72; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 73; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 74; GFX10-NEXT: v_rcp_f32_e32 v2, v2 75; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 76; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 77; GFX10-NEXT: v_div_fixup_f16 v0, v2, v1, v0 78; GFX10-NEXT: s_setpc_b64 s[30:31] 79 %fdiv = fdiv half %a, %b 80 ret half %fdiv 81} 82 83define half @v_fdiv_f16_afn(half %a, half %b) { 84; GFX6-LABEL: v_fdiv_f16_afn: 85; GFX6: ; %bb.0: 86; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 88; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 89; GFX6-NEXT: v_rcp_f32_e32 v1, v1 90; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 91; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 92; GFX6-NEXT: s_setpc_b64 s[30:31] 93; 94; GFX89-LABEL: v_fdiv_f16_afn: 95; GFX89: ; %bb.0: 96; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX89-NEXT: v_rcp_f16_e32 v1, v1 98; GFX89-NEXT: v_mul_f16_e32 v0, v0, v1 99; GFX89-NEXT: s_setpc_b64 s[30:31] 100; 101; GFX10-LABEL: v_fdiv_f16_afn: 102; GFX10: ; %bb.0: 103; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 105; GFX10-NEXT: v_rcp_f16_e32 v1, v1 106; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 107; GFX10-NEXT: s_setpc_b64 s[30:31] 108 %fdiv = fdiv afn half %a, %b 109 ret half %fdiv 110} 111 112define half @v_fdiv_f16_ulp25(half %a, half %b) { 113; GFX6-IEEE-LABEL: v_fdiv_f16_ulp25: 114; GFX6-IEEE: ; %bb.0: 115; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 117; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 118; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 119; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 120; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 121; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 122; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 123; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 124; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v4 125; GFX6-IEEE-NEXT: v_fma_f32 v5, v6, v3, v5 126; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 127; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 128; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 129; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 130; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 131; 132; GFX6-FLUSH-LABEL: v_fdiv_f16_ulp25: 133; GFX6-FLUSH: ; %bb.0: 134; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 135; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 136; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 137; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 138; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 139; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 140; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 141; GFX6-FLUSH-NEXT: v_fma_f32 v5, -v2, v3, 1.0 142; GFX6-FLUSH-NEXT: v_fma_f32 v3, v5, v3, v3 143; GFX6-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v3 144; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v4 145; GFX6-FLUSH-NEXT: v_fma_f32 v5, v6, v3, v5 146; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 147; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 148; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 149; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 150; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 151; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 152; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX89-LABEL: v_fdiv_f16_ulp25: 155; GFX89: ; %bb.0: 156; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1 158; GFX89-NEXT: v_cvt_f32_f16_e32 v3, v0 159; GFX89-NEXT: v_rcp_f32_e32 v2, v2 160; GFX89-NEXT: v_mul_f32_e32 v2, v3, v2 161; GFX89-NEXT: v_cvt_f16_f32_e32 v2, v2 162; GFX89-NEXT: v_div_fixup_f16 v0, v2, v1, v0 163; GFX89-NEXT: s_setpc_b64 s[30:31] 164; 165; GFX10-LABEL: v_fdiv_f16_ulp25: 166; GFX10: ; %bb.0: 167; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 169; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 170; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 171; GFX10-NEXT: v_rcp_f32_e32 v2, v2 172; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 173; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 174; GFX10-NEXT: v_div_fixup_f16 v0, v2, v1, v0 175; GFX10-NEXT: s_setpc_b64 s[30:31] 176 %fdiv = fdiv half %a, %b, !fpmath !0 177 ret half %fdiv 178} 179 180define half @v_rcp_f16(half %x) { 181; GFX6-IEEE-LABEL: v_rcp_f16: 182; GFX6-IEEE: ; %bb.0: 183; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 185; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 186; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 187; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 188; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v0, v1 189; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 190; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 191; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 192; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v4 193; GFX6-IEEE-NEXT: v_fma_f32 v5, v6, v3, v5 194; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 195; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 196; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 197; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 198; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 199; 200; GFX6-FLUSH-LABEL: v_rcp_f16: 201; GFX6-FLUSH: ; %bb.0: 202; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 204; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 205; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 206; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 207; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v1, v0, v1 208; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 209; GFX6-FLUSH-NEXT: v_fma_f32 v5, -v2, v3, 1.0 210; GFX6-FLUSH-NEXT: v_fma_f32 v3, v5, v3, v3 211; GFX6-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v3 212; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v4 213; GFX6-FLUSH-NEXT: v_fma_f32 v5, v6, v3, v5 214; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 215; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 216; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 217; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 218; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 219; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 220; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX89-LABEL: v_rcp_f16: 223; GFX89: ; %bb.0: 224; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v0 226; GFX89-NEXT: v_cvt_f32_f16_e32 v2, 1.0 227; GFX89-NEXT: v_rcp_f32_e32 v1, v1 228; GFX89-NEXT: v_mul_f32_e32 v1, v2, v1 229; GFX89-NEXT: v_cvt_f16_f32_e32 v1, v1 230; GFX89-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 231; GFX89-NEXT: s_setpc_b64 s[30:31] 232; 233; GFX10-LABEL: v_rcp_f16: 234; GFX10: ; %bb.0: 235; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 237; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0 238; GFX10-NEXT: v_cvt_f32_f16_e32 v2, 1.0 239; GFX10-NEXT: v_rcp_f32_e32 v1, v1 240; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1 241; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 242; GFX10-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 243; GFX10-NEXT: s_setpc_b64 s[30:31] 244 %fdiv = fdiv half 1.0, %x 245 ret half %fdiv 246} 247 248define half @v_rcp_f16_arcp(half %x) { 249; GFX6-IEEE-LABEL: v_rcp_f16_arcp: 250; GFX6-IEEE: ; %bb.0: 251; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 253; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 254; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 255; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 256; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v0, v1 257; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 258; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 259; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 260; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v4 261; GFX6-IEEE-NEXT: v_fma_f32 v5, v6, v3, v5 262; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 263; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 264; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 265; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 266; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 267; 268; GFX6-FLUSH-LABEL: v_rcp_f16_arcp: 269; GFX6-FLUSH: ; %bb.0: 270; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 271; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 272; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 273; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 274; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 275; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v1, v0, v1 276; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 277; GFX6-FLUSH-NEXT: v_fma_f32 v5, -v2, v3, 1.0 278; GFX6-FLUSH-NEXT: v_fma_f32 v3, v5, v3, v3 279; GFX6-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v3 280; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v4 281; GFX6-FLUSH-NEXT: v_fma_f32 v5, v6, v3, v5 282; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 283; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 284; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 285; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 286; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 287; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 288; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 289; 290; GFX89-LABEL: v_rcp_f16_arcp: 291; GFX89: ; %bb.0: 292; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; GFX89-NEXT: v_cvt_f32_f16_e32 v1, v0 294; GFX89-NEXT: v_cvt_f32_f16_e32 v2, 1.0 295; GFX89-NEXT: v_rcp_f32_e32 v1, v1 296; GFX89-NEXT: v_mul_f32_e32 v1, v2, v1 297; GFX89-NEXT: v_cvt_f16_f32_e32 v1, v1 298; GFX89-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 299; GFX89-NEXT: s_setpc_b64 s[30:31] 300; 301; GFX10-LABEL: v_rcp_f16_arcp: 302; GFX10: ; %bb.0: 303; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 305; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v0 306; GFX10-NEXT: v_cvt_f32_f16_e32 v2, 1.0 307; GFX10-NEXT: v_rcp_f32_e32 v1, v1 308; GFX10-NEXT: v_mul_f32_e32 v1, v2, v1 309; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 310; GFX10-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 311; GFX10-NEXT: s_setpc_b64 s[30:31] 312 %fdiv = fdiv arcp half 1.0, %x 313 ret half %fdiv 314} 315 316define half @v_rcp_f16_arcp_afn(half %x) { 317; GFX6-LABEL: v_rcp_f16_arcp_afn: 318; GFX6: ; %bb.0: 319; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 320; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 321; GFX6-NEXT: v_cvt_f32_f16_e32 v1, 1.0 322; GFX6-NEXT: v_rcp_f32_e32 v0, v0 323; GFX6-NEXT: v_mul_f32_e32 v0, v1, v0 324; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 325; GFX6-NEXT: s_setpc_b64 s[30:31] 326; 327; GFX89-LABEL: v_rcp_f16_arcp_afn: 328; GFX89: ; %bb.0: 329; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX89-NEXT: v_rcp_f16_e32 v0, v0 331; GFX89-NEXT: s_setpc_b64 s[30:31] 332; 333; GFX10-LABEL: v_rcp_f16_arcp_afn: 334; GFX10: ; %bb.0: 335; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 336; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 337; GFX10-NEXT: v_rcp_f16_e32 v0, v0 338; GFX10-NEXT: s_setpc_b64 s[30:31] 339 %fdiv = fdiv arcp afn half 1.0, %x 340 ret half %fdiv 341} 342 343define half @v_rcp_f16_ulp25(half %x) { 344; GFX6-IEEE-LABEL: v_rcp_f16_ulp25: 345; GFX6-IEEE: ; %bb.0: 346; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, 1.0 348; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 349; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 350; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 351; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v1, v0, v1 352; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 353; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 354; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 355; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v4 356; GFX6-IEEE-NEXT: v_fma_f32 v5, v6, v3, v5 357; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 358; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 359; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v0, v1 360; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 361; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 362; 363; GFX6-FLUSH-LABEL: v_rcp_f16_ulp25: 364; GFX6-FLUSH: ; %bb.0: 365; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 366; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, 1.0 367; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 368; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v0, v0, v1 369; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 370; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v1, v0, v1 371; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 372; GFX6-FLUSH-NEXT: v_fma_f32 v5, -v2, v3, 1.0 373; GFX6-FLUSH-NEXT: v_fma_f32 v3, v5, v3, v3 374; GFX6-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v3 375; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v4 376; GFX6-FLUSH-NEXT: v_fma_f32 v5, v6, v3, v5 377; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 378; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 379; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 380; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v0, v1 381; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 382; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 383; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 384; 385; GFX89-LABEL: v_rcp_f16_ulp25: 386; GFX89: ; %bb.0: 387; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 388; GFX89-NEXT: v_rcp_f16_e32 v0, v0 389; GFX89-NEXT: s_setpc_b64 s[30:31] 390; 391; GFX10-LABEL: v_rcp_f16_ulp25: 392; GFX10: ; %bb.0: 393; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 394; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 395; GFX10-NEXT: v_rcp_f16_e32 v0, v0 396; GFX10-NEXT: s_setpc_b64 s[30:31] 397 %fdiv = fdiv half 1.0, %x, !fpmath !0 398 ret half %fdiv 399} 400 401define half @v_fdiv_f16_afn_ulp25(half %a, half %b) { 402; GFX6-LABEL: v_fdiv_f16_afn_ulp25: 403; GFX6: ; %bb.0: 404; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 405; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 406; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 407; GFX6-NEXT: v_rcp_f32_e32 v1, v1 408; GFX6-NEXT: v_mul_f32_e32 v0, v0, v1 409; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 410; GFX6-NEXT: s_setpc_b64 s[30:31] 411; 412; GFX89-LABEL: v_fdiv_f16_afn_ulp25: 413; GFX89: ; %bb.0: 414; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX89-NEXT: v_rcp_f16_e32 v1, v1 416; GFX89-NEXT: v_mul_f16_e32 v0, v0, v1 417; GFX89-NEXT: s_setpc_b64 s[30:31] 418; 419; GFX10-LABEL: v_fdiv_f16_afn_ulp25: 420; GFX10: ; %bb.0: 421; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 423; GFX10-NEXT: v_rcp_f16_e32 v1, v1 424; GFX10-NEXT: v_mul_f16_e32 v0, v0, v1 425; GFX10-NEXT: s_setpc_b64 s[30:31] 426 %fdiv = fdiv afn half %a, %b, !fpmath !0 427 ret half %fdiv 428} 429 430define half @v_fdiv_f16_arcp_ulp25(half %a, half %b) { 431; GFX6-IEEE-LABEL: v_fdiv_f16_arcp_ulp25: 432; GFX6-IEEE: ; %bb.0: 433; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 435; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 436; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 437; GFX6-IEEE-NEXT: v_rcp_f32_e32 v3, v2 438; GFX6-IEEE-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 439; GFX6-IEEE-NEXT: v_fma_f32 v5, -v2, v3, 1.0 440; GFX6-IEEE-NEXT: v_fma_f32 v3, v5, v3, v3 441; GFX6-IEEE-NEXT: v_mul_f32_e32 v5, v4, v3 442; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v5, v4 443; GFX6-IEEE-NEXT: v_fma_f32 v5, v6, v3, v5 444; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v5, v4 445; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v3, v5 446; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v2, v1, v0 447; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 448; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 449; 450; GFX6-FLUSH-LABEL: v_fdiv_f16_arcp_ulp25: 451; GFX6-FLUSH: ; %bb.0: 452; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 453; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 454; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 455; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v0 456; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 457; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, vcc, v0, v1, v0 458; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 459; GFX6-FLUSH-NEXT: v_fma_f32 v5, -v2, v3, 1.0 460; GFX6-FLUSH-NEXT: v_fma_f32 v3, v5, v3, v3 461; GFX6-FLUSH-NEXT: v_mul_f32_e32 v5, v4, v3 462; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v5, v4 463; GFX6-FLUSH-NEXT: v_fma_f32 v5, v6, v3, v5 464; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v5, v4 465; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 466; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v5 467; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v2, v1, v0 468; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 469; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 470; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 471; 472; GFX89-LABEL: v_fdiv_f16_arcp_ulp25: 473; GFX89: ; %bb.0: 474; GFX89-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475; GFX89-NEXT: v_cvt_f32_f16_e32 v2, v1 476; GFX89-NEXT: v_cvt_f32_f16_e32 v3, v0 477; GFX89-NEXT: v_rcp_f32_e32 v2, v2 478; GFX89-NEXT: v_mul_f32_e32 v2, v3, v2 479; GFX89-NEXT: v_cvt_f16_f32_e32 v2, v2 480; GFX89-NEXT: v_div_fixup_f16 v0, v2, v1, v0 481; GFX89-NEXT: s_setpc_b64 s[30:31] 482; 483; GFX10-LABEL: v_fdiv_f16_arcp_ulp25: 484; GFX10: ; %bb.0: 485; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 487; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 488; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 489; GFX10-NEXT: v_rcp_f32_e32 v2, v2 490; GFX10-NEXT: v_mul_f32_e32 v2, v3, v2 491; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 492; GFX10-NEXT: v_div_fixup_f16 v0, v2, v1, v0 493; GFX10-NEXT: s_setpc_b64 s[30:31] 494 %fdiv = fdiv arcp half %a, %b, !fpmath !0 495 ret half %fdiv 496} 497 498define <2 x half> @v_fdiv_v2f16(<2 x half> %a, <2 x half> %b) { 499; GFX6-IEEE-LABEL: v_fdiv_v2f16: 500; GFX6-IEEE: ; %bb.0: 501; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 503; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 504; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 505; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v3 506; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 507; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 508; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 509; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 510; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 511; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 512; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 513; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 514; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 515; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 516; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 517; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 518; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v2 519; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 520; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 521; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v4, 1.0 522; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 523; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 524; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v5 525; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 526; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v5 527; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 528; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 529; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 530; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 531; 532; GFX6-FLUSH-LABEL: v_fdiv_v2f16: 533; GFX6-FLUSH: ; %bb.0: 534; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 536; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 537; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 538; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 539; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 540; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 541; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v4, v5, 1.0 542; GFX6-FLUSH-NEXT: v_fma_f32 v5, v7, v5, v5 543; GFX6-FLUSH-NEXT: v_mul_f32_e32 v7, v6, v5 544; GFX6-FLUSH-NEXT: v_fma_f32 v8, -v4, v7, v6 545; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 546; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 547; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 548; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 549; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 550; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 551; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 552; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 553; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 554; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 555; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 556; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 557; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 558; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v4, 1.0 559; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 560; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 561; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 562; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 563; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 564; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 565; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 566; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 567; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 568; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 569; 570; GFX8-LABEL: v_fdiv_v2f16: 571; GFX8: ; %bb.0: 572; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 574; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v1 575; GFX8-NEXT: v_cvt_f32_f16_e32 v5, v4 576; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 577; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v0 578; GFX8-NEXT: v_rcp_f32_e32 v2, v2 579; GFX8-NEXT: v_cvt_f32_f16_e32 v7, v6 580; GFX8-NEXT: v_rcp_f32_e32 v5, v5 581; GFX8-NEXT: v_mul_f32_e32 v2, v3, v2 582; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2 583; GFX8-NEXT: v_mul_f32_e32 v3, v7, v5 584; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3 585; GFX8-NEXT: v_div_fixup_f16 v0, v2, v1, v0 586; GFX8-NEXT: v_mov_b32_e32 v2, 16 587; GFX8-NEXT: v_div_fixup_f16 v1, v3, v4, v6 588; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 589; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 590; GFX8-NEXT: s_setpc_b64 s[30:31] 591; 592; GFX9-LABEL: v_fdiv_v2f16: 593; GFX9: ; %bb.0: 594; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v1 596; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v1 597; GFX9-NEXT: v_cvt_f32_f16_e32 v5, v4 598; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v0 599; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v0 600; GFX9-NEXT: v_rcp_f32_e32 v2, v2 601; GFX9-NEXT: v_cvt_f32_f16_e32 v7, v6 602; GFX9-NEXT: v_rcp_f32_e32 v5, v5 603; GFX9-NEXT: v_mul_f32_e32 v2, v3, v2 604; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 605; GFX9-NEXT: v_mul_f32_e32 v3, v7, v5 606; GFX9-NEXT: v_cvt_f16_f32_e32 v3, v3 607; GFX9-NEXT: v_div_fixup_f16 v0, v2, v1, v0 608; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 609; GFX9-NEXT: v_div_fixup_f16 v1, v3, v4, v6 610; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 611; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 612; GFX9-NEXT: s_setpc_b64 s[30:31] 613; 614; GFX10-LABEL: v_fdiv_v2f16: 615; GFX10: ; %bb.0: 616; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 617; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 618; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1 619; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1 620; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0 621; GFX10-NEXT: v_cvt_f32_f16_e32 v7, v0 622; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v2 623; GFX10-NEXT: v_rcp_f32_e32 v4, v4 624; GFX10-NEXT: v_cvt_f32_f16_e32 v6, v5 625; GFX10-NEXT: v_rcp_f32_e32 v3, v3 626; GFX10-NEXT: v_mul_f32_e32 v4, v7, v4 627; GFX10-NEXT: v_mul_f32_e32 v3, v6, v3 628; GFX10-NEXT: v_cvt_f16_f32_e32 v4, v4 629; GFX10-NEXT: v_cvt_f16_f32_e32 v3, v3 630; GFX10-NEXT: v_div_fixup_f16 v0, v4, v1, v0 631; GFX10-NEXT: v_div_fixup_f16 v2, v3, v2, v5 632; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v2 633; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 634; GFX10-NEXT: s_setpc_b64 s[30:31] 635 %fdiv = fdiv <2 x half> %a, %b 636 ret <2 x half> %fdiv 637} 638 639define <2 x half> @v_fdiv_v2f16_afn(<2 x half> %a, <2 x half> %b) { 640; GFX6-LABEL: v_fdiv_v2f16_afn: 641; GFX6: ; %bb.0: 642; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 644; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 645; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 646; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 647; GFX6-NEXT: v_rcp_f32_e32 v2, v2 648; GFX6-NEXT: v_rcp_f32_e32 v3, v3 649; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 650; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 651; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 652; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 653; GFX6-NEXT: s_setpc_b64 s[30:31] 654; 655; GFX8-LABEL: v_fdiv_v2f16_afn: 656; GFX8: ; %bb.0: 657; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX8-NEXT: v_rcp_f16_e32 v2, v1 659; GFX8-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 660; GFX8-NEXT: v_mul_f16_e32 v2, v0, v2 661; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 662; GFX8-NEXT: v_mov_b32_e32 v1, 16 663; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 664; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 665; GFX8-NEXT: s_setpc_b64 s[30:31] 666; 667; GFX9-LABEL: v_fdiv_v2f16_afn: 668; GFX9: ; %bb.0: 669; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670; GFX9-NEXT: v_rcp_f16_e32 v2, v1 671; GFX9-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 672; GFX9-NEXT: v_mul_f16_e32 v2, v0, v2 673; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 674; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff 675; GFX9-NEXT: v_and_or_b32 v0, v2, v1, v0 676; GFX9-NEXT: s_setpc_b64 s[30:31] 677; 678; GFX10-LABEL: v_fdiv_v2f16_afn: 679; GFX10: ; %bb.0: 680; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 681; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 682; GFX10-NEXT: v_rcp_f16_e32 v2, v1 683; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 684; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2 685; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 686; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v2, v0 687; GFX10-NEXT: s_setpc_b64 s[30:31] 688 %fdiv = fdiv afn <2 x half> %a, %b 689 ret <2 x half> %fdiv 690} 691 692define <2 x half> @v_fdiv_v2f16_ulp25(<2 x half> %a, <2 x half> %b) { 693; GFX6-IEEE-LABEL: v_fdiv_v2f16_ulp25: 694; GFX6-IEEE: ; %bb.0: 695; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 697; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 698; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 699; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v3 700; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 701; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 702; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 703; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 704; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 705; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 706; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 707; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 708; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 709; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 710; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 711; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 712; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v2 713; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 714; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 715; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v4, 1.0 716; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 717; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 718; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v5 719; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 720; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v5 721; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 722; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 723; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 724; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 725; 726; GFX6-FLUSH-LABEL: v_fdiv_v2f16_ulp25: 727; GFX6-FLUSH: ; %bb.0: 728; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 729; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 730; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 731; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 732; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 733; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 734; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 735; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v4, v5, 1.0 736; GFX6-FLUSH-NEXT: v_fma_f32 v5, v7, v5, v5 737; GFX6-FLUSH-NEXT: v_mul_f32_e32 v7, v6, v5 738; GFX6-FLUSH-NEXT: v_fma_f32 v8, -v4, v7, v6 739; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 740; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 741; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 742; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 743; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 744; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 745; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 746; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 747; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 748; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 749; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 750; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 751; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 752; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v4, 1.0 753; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 754; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 755; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 756; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 757; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 758; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 759; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 760; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 761; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 762; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 763; 764; GFX8-LABEL: v_fdiv_v2f16_ulp25: 765; GFX8: ; %bb.0: 766; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 767; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 768; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v1 769; GFX8-NEXT: v_cvt_f32_f16_e32 v5, v4 770; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 771; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v0 772; GFX8-NEXT: v_rcp_f32_e32 v2, v2 773; GFX8-NEXT: v_cvt_f32_f16_e32 v7, v6 774; GFX8-NEXT: v_rcp_f32_e32 v5, v5 775; GFX8-NEXT: v_mul_f32_e32 v2, v3, v2 776; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2 777; GFX8-NEXT: v_mul_f32_e32 v3, v7, v5 778; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3 779; GFX8-NEXT: v_div_fixup_f16 v0, v2, v1, v0 780; GFX8-NEXT: v_mov_b32_e32 v2, 16 781; GFX8-NEXT: v_div_fixup_f16 v1, v3, v4, v6 782; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 783; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 784; GFX8-NEXT: s_setpc_b64 s[30:31] 785; 786; GFX9-LABEL: v_fdiv_v2f16_ulp25: 787; GFX9: ; %bb.0: 788; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 789; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v1 790; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v1 791; GFX9-NEXT: v_cvt_f32_f16_e32 v5, v4 792; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v0 793; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v0 794; GFX9-NEXT: v_rcp_f32_e32 v2, v2 795; GFX9-NEXT: v_cvt_f32_f16_e32 v7, v6 796; GFX9-NEXT: v_rcp_f32_e32 v5, v5 797; GFX9-NEXT: v_mul_f32_e32 v2, v3, v2 798; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 799; GFX9-NEXT: v_mul_f32_e32 v3, v7, v5 800; GFX9-NEXT: v_cvt_f16_f32_e32 v3, v3 801; GFX9-NEXT: v_div_fixup_f16 v0, v2, v1, v0 802; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 803; GFX9-NEXT: v_div_fixup_f16 v1, v3, v4, v6 804; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 805; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 806; GFX9-NEXT: s_setpc_b64 s[30:31] 807; 808; GFX10-LABEL: v_fdiv_v2f16_ulp25: 809; GFX10: ; %bb.0: 810; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 811; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 812; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1 813; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1 814; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0 815; GFX10-NEXT: v_cvt_f32_f16_e32 v7, v0 816; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v2 817; GFX10-NEXT: v_rcp_f32_e32 v4, v4 818; GFX10-NEXT: v_cvt_f32_f16_e32 v6, v5 819; GFX10-NEXT: v_rcp_f32_e32 v3, v3 820; GFX10-NEXT: v_mul_f32_e32 v4, v7, v4 821; GFX10-NEXT: v_mul_f32_e32 v3, v6, v3 822; GFX10-NEXT: v_cvt_f16_f32_e32 v4, v4 823; GFX10-NEXT: v_cvt_f16_f32_e32 v3, v3 824; GFX10-NEXT: v_div_fixup_f16 v0, v4, v1, v0 825; GFX10-NEXT: v_div_fixup_f16 v2, v3, v2, v5 826; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v2 827; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 828; GFX10-NEXT: s_setpc_b64 s[30:31] 829 %fdiv = fdiv <2 x half> %a, %b, !fpmath !0 830 ret <2 x half> %fdiv 831} 832 833define <2 x half> @v_rcp_v2f16(<2 x half> %x) { 834; GFX6-IEEE-LABEL: v_rcp_v2f16: 835; GFX6-IEEE: ; %bb.0: 836; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 837; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 838; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 839; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 840; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 841; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 842; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 843; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 844; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 845; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 846; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 847; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 848; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 849; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 850; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 851; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 852; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 853; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 854; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 855; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 856; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 857; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 858; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 859; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 860; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 861; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 862; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 863; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 864; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 865; 866; GFX6-FLUSH-LABEL: v_rcp_v2f16: 867; GFX6-FLUSH: ; %bb.0: 868; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 869; GFX6-FLUSH-NEXT: s_movk_i32 s6, 0x3c00 870; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, s6 871; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 872; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 873; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 874; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 875; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 876; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 877; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 878; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 879; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v3, v6, v5 880; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 881; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 882; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 883; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 884; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 885; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 886; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 887; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 888; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 889; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 890; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 891; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v1, v4 892; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 893; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 894; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 895; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 896; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 897; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 898; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 899; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 900; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 901; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 902; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 903; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 904; 905; GFX8-LABEL: v_rcp_v2f16: 906; GFX8: ; %bb.0: 907; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 908; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 909; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v0 910; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v2 911; GFX8-NEXT: v_cvt_f32_f16_e32 v4, 1.0 912; GFX8-NEXT: v_rcp_f32_e32 v1, v1 913; GFX8-NEXT: v_rcp_f32_e32 v3, v3 914; GFX8-NEXT: v_mul_f32_e32 v1, v4, v1 915; GFX8-NEXT: v_mul_f32_e32 v3, v4, v3 916; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1 917; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3 918; GFX8-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 919; GFX8-NEXT: v_div_fixup_f16 v1, v3, v2, 1.0 920; GFX8-NEXT: v_mov_b32_e32 v2, 16 921; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 922; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 923; GFX8-NEXT: s_setpc_b64 s[30:31] 924; 925; GFX9-LABEL: v_rcp_v2f16: 926; GFX9: ; %bb.0: 927; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 928; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 929; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v0 930; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v2 931; GFX9-NEXT: v_cvt_f32_f16_e32 v4, 1.0 932; GFX9-NEXT: v_rcp_f32_e32 v1, v1 933; GFX9-NEXT: v_rcp_f32_e32 v3, v3 934; GFX9-NEXT: v_mul_f32_e32 v1, v4, v1 935; GFX9-NEXT: v_mul_f32_e32 v3, v4, v3 936; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 937; GFX9-NEXT: v_cvt_f16_f32_e32 v3, v3 938; GFX9-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 939; GFX9-NEXT: v_div_fixup_f16 v1, v3, v2, 1.0 940; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 941; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 942; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 943; GFX9-NEXT: s_setpc_b64 s[30:31] 944; 945; GFX10-LABEL: v_rcp_v2f16: 946; GFX10: ; %bb.0: 947; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 948; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 949; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0 950; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 951; GFX10-NEXT: v_cvt_f32_f16_e32 v4, 1.0 952; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 953; GFX10-NEXT: v_rcp_f32_e32 v3, v3 954; GFX10-NEXT: v_rcp_f32_e32 v2, v2 955; GFX10-NEXT: v_mul_f32_e32 v3, v4, v3 956; GFX10-NEXT: v_mul_f32_e32 v2, v4, v2 957; GFX10-NEXT: v_cvt_f16_f32_e32 v3, v3 958; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 959; GFX10-NEXT: v_div_fixup_f16 v0, v3, v0, 1.0 960; GFX10-NEXT: v_div_fixup_f16 v1, v2, v1, 1.0 961; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 962; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 963; GFX10-NEXT: s_setpc_b64 s[30:31] 964 %fdiv = fdiv <2 x half> <half 1.0, half 1.0>, %x 965 ret <2 x half> %fdiv 966} 967 968define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) { 969; GFX6-IEEE-LABEL: v_rcp_v2f16_arcp: 970; GFX6-IEEE: ; %bb.0: 971; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 972; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 973; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 974; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 975; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 976; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 977; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 978; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 979; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 980; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 981; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 982; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 983; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 984; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 985; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 986; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 987; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 988; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 989; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 990; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 991; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 992; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 993; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 994; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 995; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 996; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 997; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 998; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 999; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 1000; 1001; GFX6-FLUSH-LABEL: v_rcp_v2f16_arcp: 1002; GFX6-FLUSH: ; %bb.0: 1003; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1004; GFX6-FLUSH-NEXT: s_movk_i32 s6, 0x3c00 1005; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, s6 1006; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 1007; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 1008; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 1009; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 1010; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1011; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 1012; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 1013; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 1014; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v3, v6, v5 1015; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 1016; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 1017; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1018; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 1019; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 1020; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 1021; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 1022; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 1023; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 1024; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 1025; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 1026; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v1, v4 1027; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1028; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 1029; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 1030; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 1031; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 1032; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 1033; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 1034; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1035; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 1036; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 1037; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 1038; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 1039; 1040; GFX8-LABEL: v_rcp_v2f16_arcp: 1041; GFX8: ; %bb.0: 1042; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1043; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1044; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v0 1045; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v2 1046; GFX8-NEXT: v_cvt_f32_f16_e32 v4, 1.0 1047; GFX8-NEXT: v_rcp_f32_e32 v1, v1 1048; GFX8-NEXT: v_rcp_f32_e32 v3, v3 1049; GFX8-NEXT: v_mul_f32_e32 v1, v4, v1 1050; GFX8-NEXT: v_mul_f32_e32 v3, v4, v3 1051; GFX8-NEXT: v_cvt_f16_f32_e32 v1, v1 1052; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3 1053; GFX8-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 1054; GFX8-NEXT: v_div_fixup_f16 v1, v3, v2, 1.0 1055; GFX8-NEXT: v_mov_b32_e32 v2, 16 1056; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 1057; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1058; GFX8-NEXT: s_setpc_b64 s[30:31] 1059; 1060; GFX9-LABEL: v_rcp_v2f16_arcp: 1061; GFX9: ; %bb.0: 1062; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1063; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1064; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v0 1065; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v2 1066; GFX9-NEXT: v_cvt_f32_f16_e32 v4, 1.0 1067; GFX9-NEXT: v_rcp_f32_e32 v1, v1 1068; GFX9-NEXT: v_rcp_f32_e32 v3, v3 1069; GFX9-NEXT: v_mul_f32_e32 v1, v4, v1 1070; GFX9-NEXT: v_mul_f32_e32 v3, v4, v3 1071; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 1072; GFX9-NEXT: v_cvt_f16_f32_e32 v3, v3 1073; GFX9-NEXT: v_div_fixup_f16 v0, v1, v0, 1.0 1074; GFX9-NEXT: v_div_fixup_f16 v1, v3, v2, 1.0 1075; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 1076; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1077; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 1078; GFX9-NEXT: s_setpc_b64 s[30:31] 1079; 1080; GFX10-LABEL: v_rcp_v2f16_arcp: 1081; GFX10: ; %bb.0: 1082; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1083; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1084; GFX10-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1085; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v0 1086; GFX10-NEXT: v_cvt_f32_f16_e32 v4, 1.0 1087; GFX10-NEXT: v_cvt_f32_f16_e32 v2, v1 1088; GFX10-NEXT: v_rcp_f32_e32 v3, v3 1089; GFX10-NEXT: v_rcp_f32_e32 v2, v2 1090; GFX10-NEXT: v_mul_f32_e32 v3, v4, v3 1091; GFX10-NEXT: v_mul_f32_e32 v2, v4, v2 1092; GFX10-NEXT: v_cvt_f16_f32_e32 v3, v3 1093; GFX10-NEXT: v_cvt_f16_f32_e32 v2, v2 1094; GFX10-NEXT: v_div_fixup_f16 v0, v3, v0, 1.0 1095; GFX10-NEXT: v_div_fixup_f16 v1, v2, v1, 1.0 1096; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1097; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 1098; GFX10-NEXT: s_setpc_b64 s[30:31] 1099 %fdiv = fdiv arcp <2 x half> <half 1.0, half 1.0>, %x 1100 ret <2 x half> %fdiv 1101} 1102 1103define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) { 1104; GFX6-LABEL: v_rcp_v2f16_arcp_afn: 1105; GFX6: ; %bb.0: 1106; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1107; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 1108; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 1109; GFX6-NEXT: v_cvt_f32_f16_e32 v2, 1.0 1110; GFX6-NEXT: v_rcp_f32_e32 v0, v0 1111; GFX6-NEXT: v_rcp_f32_e32 v1, v1 1112; GFX6-NEXT: v_mul_f32_e32 v0, v2, v0 1113; GFX6-NEXT: v_mul_f32_e32 v1, v2, v1 1114; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 1115; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 1116; GFX6-NEXT: s_setpc_b64 s[30:31] 1117; 1118; GFX8-LABEL: v_rcp_v2f16_arcp_afn: 1119; GFX8: ; %bb.0: 1120; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1121; GFX8-NEXT: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1122; GFX8-NEXT: v_rcp_f16_e32 v0, v0 1123; GFX8-NEXT: v_mov_b32_e32 v2, 16 1124; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 1125; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1126; GFX8-NEXT: s_setpc_b64 s[30:31] 1127; 1128; GFX9-LABEL: v_rcp_v2f16_arcp_afn: 1129; GFX9: ; %bb.0: 1130; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1131; GFX9-NEXT: v_rcp_f16_e32 v1, v0 1132; GFX9-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 1133; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 1134; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 1135; GFX9-NEXT: s_setpc_b64 s[30:31] 1136; 1137; GFX10-LABEL: v_rcp_v2f16_arcp_afn: 1138; GFX10: ; %bb.0: 1139; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1140; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1141; GFX10-NEXT: v_rcp_f16_e32 v1, v0 1142; GFX10-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 1143; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 1144; GFX10-NEXT: s_setpc_b64 s[30:31] 1145 %fdiv = fdiv arcp afn <2 x half> <half 1.0, half 1.0>, %x 1146 ret <2 x half> %fdiv 1147} 1148 1149define <2 x half> @v_rcp_v2f16_ulp25(<2 x half> %x) { 1150; GFX6-IEEE-LABEL: v_rcp_v2f16_ulp25: 1151; GFX6-IEEE: ; %bb.0: 1152; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, 1.0 1154; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 1155; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 1156; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 1157; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 1158; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 1159; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 1160; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 1161; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 1162; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 1163; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 1164; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 1165; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 1166; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v3, v0, v2 1167; GFX6-IEEE-NEXT: v_div_scale_f32 v3, s[4:5], v1, v1, v2 1168; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v3 1169; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v2, v1, v2 1170; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 1171; GFX6-IEEE-NEXT: v_fma_f32 v6, -v3, v4, 1.0 1172; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 1173; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 1174; GFX6-IEEE-NEXT: v_fma_f32 v7, -v3, v6, v5 1175; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 1176; GFX6-IEEE-NEXT: v_fma_f32 v3, -v3, v6, v5 1177; GFX6-IEEE-NEXT: v_div_fmas_f32 v3, v3, v4, v6 1178; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v3, v1, v2 1179; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 1180; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 1181; 1182; GFX6-FLUSH-LABEL: v_rcp_v2f16_ulp25: 1183; GFX6-FLUSH: ; %bb.0: 1184; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1185; GFX6-FLUSH-NEXT: s_movk_i32 s6, 0x3c00 1186; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, s6 1187; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 1188; GFX6-FLUSH-NEXT: v_div_scale_f32 v3, s[4:5], v0, v0, v2 1189; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v3 1190; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v2, v0, v2 1191; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1192; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v3, v4, 1.0 1193; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 1194; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 1195; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v3, v6, v5 1196; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 1197; GFX6-FLUSH-NEXT: v_fma_f32 v3, -v3, v6, v5 1198; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1199; GFX6-FLUSH-NEXT: v_div_fmas_f32 v3, v3, v4, v6 1200; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v4, s6 1201; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 1202; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v3, v0, v2 1203; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 1204; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 1205; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v1, v1, v4 1206; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v3, v2 1207; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v4, v1, v4 1208; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1209; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v3, 1.0 1210; GFX6-FLUSH-NEXT: v_fma_f32 v3, v6, v3, v3 1211; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v3 1212; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 1213; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v3, v6 1214; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 1215; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1216; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v3, v6 1217; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v1, v4 1218; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 1219; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 1220; 1221; GFX8-LABEL: v_rcp_v2f16_ulp25: 1222; GFX8: ; %bb.0: 1223; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1224; GFX8-NEXT: v_rcp_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1225; GFX8-NEXT: v_rcp_f16_e32 v0, v0 1226; GFX8-NEXT: v_mov_b32_e32 v2, 16 1227; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 1228; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1229; GFX8-NEXT: s_setpc_b64 s[30:31] 1230; 1231; GFX9-LABEL: v_rcp_v2f16_ulp25: 1232; GFX9: ; %bb.0: 1233; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1234; GFX9-NEXT: v_rcp_f16_e32 v1, v0 1235; GFX9-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 1236; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 1237; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 1238; GFX9-NEXT: s_setpc_b64 s[30:31] 1239; 1240; GFX10-LABEL: v_rcp_v2f16_ulp25: 1241; GFX10: ; %bb.0: 1242; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1243; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1244; GFX10-NEXT: v_rcp_f16_e32 v1, v0 1245; GFX10-NEXT: v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 1246; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 1247; GFX10-NEXT: s_setpc_b64 s[30:31] 1248 %fdiv = fdiv <2 x half> <half 1.0, half 1.0>, %x, !fpmath !0 1249 ret <2 x half> %fdiv 1250} 1251 1252define <2 x half> @v_fdiv_v2f16_afn_ulp25(<2 x half> %a, <2 x half> %b) { 1253; GFX6-LABEL: v_fdiv_v2f16_afn_ulp25: 1254; GFX6: ; %bb.0: 1255; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1256; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 1257; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 1258; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 1259; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 1260; GFX6-NEXT: v_rcp_f32_e32 v2, v2 1261; GFX6-NEXT: v_rcp_f32_e32 v3, v3 1262; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 1263; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 1264; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 1265; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 1266; GFX6-NEXT: s_setpc_b64 s[30:31] 1267; 1268; GFX8-LABEL: v_fdiv_v2f16_afn_ulp25: 1269; GFX8: ; %bb.0: 1270; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1271; GFX8-NEXT: v_rcp_f16_e32 v2, v1 1272; GFX8-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1273; GFX8-NEXT: v_mul_f16_e32 v2, v0, v2 1274; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1275; GFX8-NEXT: v_mov_b32_e32 v1, 16 1276; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 1277; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1278; GFX8-NEXT: s_setpc_b64 s[30:31] 1279; 1280; GFX9-LABEL: v_fdiv_v2f16_afn_ulp25: 1281; GFX9: ; %bb.0: 1282; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1283; GFX9-NEXT: v_rcp_f16_e32 v2, v1 1284; GFX9-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1285; GFX9-NEXT: v_mul_f16_e32 v2, v0, v2 1286; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1287; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff 1288; GFX9-NEXT: v_and_or_b32 v0, v2, v1, v0 1289; GFX9-NEXT: s_setpc_b64 s[30:31] 1290; 1291; GFX10-LABEL: v_fdiv_v2f16_afn_ulp25: 1292; GFX10: ; %bb.0: 1293; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1294; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1295; GFX10-NEXT: v_rcp_f16_e32 v2, v1 1296; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1297; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2 1298; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1299; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v2, v0 1300; GFX10-NEXT: s_setpc_b64 s[30:31] 1301 %fdiv = fdiv afn <2 x half> %a, %b, !fpmath !0 1302 ret <2 x half> %fdiv 1303} 1304 1305define <2 x half> @v_fdiv_v2f16_arcp_ulp25(<2 x half> %a, <2 x half> %b) { 1306; GFX6-IEEE-LABEL: v_fdiv_v2f16_arcp_ulp25: 1307; GFX6-IEEE: ; %bb.0: 1308; GFX6-IEEE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1309; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v0, v0 1310; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v2, v2 1311; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v1, v1 1312; GFX6-IEEE-NEXT: v_cvt_f32_f16_e32 v3, v3 1313; GFX6-IEEE-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 1314; GFX6-IEEE-NEXT: v_rcp_f32_e32 v5, v4 1315; GFX6-IEEE-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 1316; GFX6-IEEE-NEXT: v_fma_f32 v7, -v4, v5, 1.0 1317; GFX6-IEEE-NEXT: v_fma_f32 v5, v7, v5, v5 1318; GFX6-IEEE-NEXT: v_mul_f32_e32 v7, v6, v5 1319; GFX6-IEEE-NEXT: v_fma_f32 v8, -v4, v7, v6 1320; GFX6-IEEE-NEXT: v_fma_f32 v7, v8, v5, v7 1321; GFX6-IEEE-NEXT: v_fma_f32 v4, -v4, v7, v6 1322; GFX6-IEEE-NEXT: v_div_fmas_f32 v4, v4, v5, v7 1323; GFX6-IEEE-NEXT: v_div_fixup_f32 v0, v4, v2, v0 1324; GFX6-IEEE-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 1325; GFX6-IEEE-NEXT: v_rcp_f32_e32 v4, v2 1326; GFX6-IEEE-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 1327; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v0, v0 1328; GFX6-IEEE-NEXT: v_fma_f32 v6, -v2, v4, 1.0 1329; GFX6-IEEE-NEXT: v_fma_f32 v4, v6, v4, v4 1330; GFX6-IEEE-NEXT: v_mul_f32_e32 v6, v5, v4 1331; GFX6-IEEE-NEXT: v_fma_f32 v7, -v2, v6, v5 1332; GFX6-IEEE-NEXT: v_fma_f32 v6, v7, v4, v6 1333; GFX6-IEEE-NEXT: v_fma_f32 v2, -v2, v6, v5 1334; GFX6-IEEE-NEXT: v_div_fmas_f32 v2, v2, v4, v6 1335; GFX6-IEEE-NEXT: v_div_fixup_f32 v1, v2, v3, v1 1336; GFX6-IEEE-NEXT: v_cvt_f16_f32_e32 v1, v1 1337; GFX6-IEEE-NEXT: s_setpc_b64 s[30:31] 1338; 1339; GFX6-FLUSH-LABEL: v_fdiv_v2f16_arcp_ulp25: 1340; GFX6-FLUSH: ; %bb.0: 1341; GFX6-FLUSH-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1342; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v0, v0 1343; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v2, v2 1344; GFX6-FLUSH-NEXT: v_div_scale_f32 v4, s[4:5], v2, v2, v0 1345; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v5, v4 1346; GFX6-FLUSH-NEXT: v_div_scale_f32 v6, vcc, v0, v2, v0 1347; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1348; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v4, v5, 1.0 1349; GFX6-FLUSH-NEXT: v_fma_f32 v5, v7, v5, v5 1350; GFX6-FLUSH-NEXT: v_mul_f32_e32 v7, v6, v5 1351; GFX6-FLUSH-NEXT: v_fma_f32 v8, -v4, v7, v6 1352; GFX6-FLUSH-NEXT: v_fma_f32 v7, v8, v5, v7 1353; GFX6-FLUSH-NEXT: v_fma_f32 v4, -v4, v7, v6 1354; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1355; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v1, v1 1356; GFX6-FLUSH-NEXT: v_cvt_f32_f16_e32 v3, v3 1357; GFX6-FLUSH-NEXT: v_div_fmas_f32 v4, v4, v5, v7 1358; GFX6-FLUSH-NEXT: v_div_fixup_f32 v0, v4, v2, v0 1359; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 0 1360; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v0, v0 1361; GFX6-FLUSH-NEXT: v_div_scale_f32 v2, s[4:5], v3, v3, v1 1362; GFX6-FLUSH-NEXT: v_rcp_f32_e32 v4, v2 1363; GFX6-FLUSH-NEXT: v_div_scale_f32 v5, vcc, v1, v3, v1 1364; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 1365; GFX6-FLUSH-NEXT: v_fma_f32 v6, -v2, v4, 1.0 1366; GFX6-FLUSH-NEXT: v_fma_f32 v4, v6, v4, v4 1367; GFX6-FLUSH-NEXT: v_mul_f32_e32 v6, v5, v4 1368; GFX6-FLUSH-NEXT: v_fma_f32 v7, -v2, v6, v5 1369; GFX6-FLUSH-NEXT: v_fma_f32 v6, v7, v4, v6 1370; GFX6-FLUSH-NEXT: v_fma_f32 v2, -v2, v6, v5 1371; GFX6-FLUSH-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 1372; GFX6-FLUSH-NEXT: v_div_fmas_f32 v2, v2, v4, v6 1373; GFX6-FLUSH-NEXT: v_div_fixup_f32 v1, v2, v3, v1 1374; GFX6-FLUSH-NEXT: v_cvt_f16_f32_e32 v1, v1 1375; GFX6-FLUSH-NEXT: s_setpc_b64 s[30:31] 1376; 1377; GFX8-LABEL: v_fdiv_v2f16_arcp_ulp25: 1378; GFX8: ; %bb.0: 1379; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX8-NEXT: v_lshrrev_b32_e32 v4, 16, v1 1381; GFX8-NEXT: v_cvt_f32_f16_e32 v2, v1 1382; GFX8-NEXT: v_cvt_f32_f16_e32 v5, v4 1383; GFX8-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1384; GFX8-NEXT: v_cvt_f32_f16_e32 v3, v0 1385; GFX8-NEXT: v_rcp_f32_e32 v2, v2 1386; GFX8-NEXT: v_cvt_f32_f16_e32 v7, v6 1387; GFX8-NEXT: v_rcp_f32_e32 v5, v5 1388; GFX8-NEXT: v_mul_f32_e32 v2, v3, v2 1389; GFX8-NEXT: v_cvt_f16_f32_e32 v2, v2 1390; GFX8-NEXT: v_mul_f32_e32 v3, v7, v5 1391; GFX8-NEXT: v_cvt_f16_f32_e32 v3, v3 1392; GFX8-NEXT: v_div_fixup_f16 v0, v2, v1, v0 1393; GFX8-NEXT: v_mov_b32_e32 v2, 16 1394; GFX8-NEXT: v_div_fixup_f16 v1, v3, v4, v6 1395; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 1396; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1397; GFX8-NEXT: s_setpc_b64 s[30:31] 1398; 1399; GFX9-LABEL: v_fdiv_v2f16_arcp_ulp25: 1400; GFX9: ; %bb.0: 1401; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1402; GFX9-NEXT: v_lshrrev_b32_e32 v4, 16, v1 1403; GFX9-NEXT: v_cvt_f32_f16_e32 v2, v1 1404; GFX9-NEXT: v_cvt_f32_f16_e32 v5, v4 1405; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1406; GFX9-NEXT: v_cvt_f32_f16_e32 v3, v0 1407; GFX9-NEXT: v_rcp_f32_e32 v2, v2 1408; GFX9-NEXT: v_cvt_f32_f16_e32 v7, v6 1409; GFX9-NEXT: v_rcp_f32_e32 v5, v5 1410; GFX9-NEXT: v_mul_f32_e32 v2, v3, v2 1411; GFX9-NEXT: v_cvt_f16_f32_e32 v2, v2 1412; GFX9-NEXT: v_mul_f32_e32 v3, v7, v5 1413; GFX9-NEXT: v_cvt_f16_f32_e32 v3, v3 1414; GFX9-NEXT: v_div_fixup_f16 v0, v2, v1, v0 1415; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 1416; GFX9-NEXT: v_div_fixup_f16 v1, v3, v4, v6 1417; GFX9-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1418; GFX9-NEXT: v_and_or_b32 v0, v0, v2, v1 1419; GFX9-NEXT: s_setpc_b64 s[30:31] 1420; 1421; GFX10-LABEL: v_fdiv_v2f16_arcp_ulp25: 1422; GFX10: ; %bb.0: 1423; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1424; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1425; GFX10-NEXT: v_lshrrev_b32_e32 v2, 16, v1 1426; GFX10-NEXT: v_cvt_f32_f16_e32 v4, v1 1427; GFX10-NEXT: v_lshrrev_b32_e32 v5, 16, v0 1428; GFX10-NEXT: v_cvt_f32_f16_e32 v7, v0 1429; GFX10-NEXT: v_cvt_f32_f16_e32 v3, v2 1430; GFX10-NEXT: v_rcp_f32_e32 v4, v4 1431; GFX10-NEXT: v_cvt_f32_f16_e32 v6, v5 1432; GFX10-NEXT: v_rcp_f32_e32 v3, v3 1433; GFX10-NEXT: v_mul_f32_e32 v4, v7, v4 1434; GFX10-NEXT: v_mul_f32_e32 v3, v6, v3 1435; GFX10-NEXT: v_cvt_f16_f32_e32 v4, v4 1436; GFX10-NEXT: v_cvt_f16_f32_e32 v3, v3 1437; GFX10-NEXT: v_div_fixup_f16 v0, v4, v1, v0 1438; GFX10-NEXT: v_div_fixup_f16 v2, v3, v2, v5 1439; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v2 1440; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v0, v1 1441; GFX10-NEXT: s_setpc_b64 s[30:31] 1442 %fdiv = fdiv arcp <2 x half> %a, %b, !fpmath !0 1443 ret <2 x half> %fdiv 1444} 1445 1446define <2 x half> @v_fdiv_v2f16_arcp_afn_ulp25(<2 x half> %a, <2 x half> %b) { 1447; GFX6-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: 1448; GFX6: ; %bb.0: 1449; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1450; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 1451; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 1452; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 1453; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 1454; GFX6-NEXT: v_rcp_f32_e32 v2, v2 1455; GFX6-NEXT: v_rcp_f32_e32 v3, v3 1456; GFX6-NEXT: v_mul_f32_e32 v0, v0, v2 1457; GFX6-NEXT: v_mul_f32_e32 v1, v1, v3 1458; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 1459; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 1460; GFX6-NEXT: s_setpc_b64 s[30:31] 1461; 1462; GFX8-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: 1463; GFX8: ; %bb.0: 1464; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1465; GFX8-NEXT: v_rcp_f16_e32 v2, v1 1466; GFX8-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1467; GFX8-NEXT: v_mul_f16_e32 v2, v0, v2 1468; GFX8-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1469; GFX8-NEXT: v_mov_b32_e32 v1, 16 1470; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 1471; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 1472; GFX8-NEXT: s_setpc_b64 s[30:31] 1473; 1474; GFX9-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: 1475; GFX9: ; %bb.0: 1476; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1477; GFX9-NEXT: v_rcp_f16_e32 v2, v1 1478; GFX9-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1479; GFX9-NEXT: v_mul_f16_e32 v2, v0, v2 1480; GFX9-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1481; GFX9-NEXT: v_mov_b32_e32 v1, 0xffff 1482; GFX9-NEXT: v_and_or_b32 v0, v2, v1, v0 1483; GFX9-NEXT: s_setpc_b64 s[30:31] 1484; 1485; GFX10-LABEL: v_fdiv_v2f16_arcp_afn_ulp25: 1486; GFX10: ; %bb.0: 1487; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1488; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1489; GFX10-NEXT: v_rcp_f16_e32 v2, v1 1490; GFX10-NEXT: v_rcp_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1491; GFX10-NEXT: v_mul_f16_e32 v2, v0, v2 1492; GFX10-NEXT: v_mul_f16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1493; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v2, v0 1494; GFX10-NEXT: s_setpc_b64 s[30:31] 1495 %fdiv = fdiv afn arcp <2 x half> %a, %b, !fpmath !0 1496 ret <2 x half> %fdiv 1497} 1498 1499!0 = !{float 2.500000e+00} 1500