1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX6 %s 4 5; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 6; RUN: llc -global-isel -march=amdgcn -mcpu=fiji -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8 %s 7 8; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 9; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 10 11; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=ieee -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 12; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -denormal-fp-math=preserve-sign -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 13 14define double @v_fdiv_f64(double %a, double %b) { 15; GFX6-LABEL: v_fdiv_f64: 16; GFX6: ; %bb.0: 17; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 19; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1] 20; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 21; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5 22; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v11 23; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 24; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 25; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 26; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 27; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 28; GFX6-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7] 29; GFX6-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11] 30; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9] 31; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 32; GFX6-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX8-LABEL: v_fdiv_f64: 35; GFX8: ; %bb.0: 36; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX8-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 38; GFX8-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 39; GFX8-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 40; GFX8-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 41; GFX8-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] 42; GFX8-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0 43; GFX8-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] 44; GFX8-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] 45; GFX8-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] 46; GFX8-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] 47; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 48; GFX8-NEXT: s_setpc_b64 s[30:31] 49; 50; GFX9-LABEL: v_fdiv_f64: 51; GFX9: ; %bb.0: 52; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX9-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 54; GFX9-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 55; GFX9-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 56; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 57; GFX9-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] 58; GFX9-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0 59; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] 60; GFX9-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] 61; GFX9-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] 62; GFX9-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] 63; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 64; GFX9-NEXT: s_setpc_b64 s[30:31] 65; 66; GFX10-LABEL: v_fdiv_f64: 67; GFX10: ; %bb.0: 68; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 70; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] 71; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] 72; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 73; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 74; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 75; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 76; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 77; GFX10-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7] 78; GFX10-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11] 79; GFX10-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9] 80; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 81; GFX10-NEXT: s_setpc_b64 s[30:31] 82 %fdiv = fdiv double %a, %b 83 ret double %fdiv 84} 85 86define double @v_fdiv_f64_afn(double %a, double %b) { 87; GCN-LABEL: v_fdiv_f64_afn: 88; GCN: ; %bb.0: 89; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GCN-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 91; GCN-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 92; GCN-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 93; GCN-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 94; GCN-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 95; GCN-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5] 96; GCN-NEXT: v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1] 97; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7] 98; GCN-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX10-LABEL: v_fdiv_f64_afn: 101; GFX10: ; %bb.0: 102; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 104; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 105; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 106; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 107; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 108; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 109; GFX10-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5] 110; GFX10-NEXT: v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1] 111; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7] 112; GFX10-NEXT: s_setpc_b64 s[30:31] 113 %fdiv = fdiv afn double %a, %b 114 ret double %fdiv 115} 116 117define double @v_fdiv_f64_ulp25(double %a, double %b) { 118; GFX6-LABEL: v_fdiv_f64_ulp25: 119; GFX6: ; %bb.0: 120; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 121; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 122; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1] 123; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 124; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5 125; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v11 126; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 127; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 128; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 129; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 130; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 131; GFX6-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7] 132; GFX6-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11] 133; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9] 134; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 135; GFX6-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX8-LABEL: v_fdiv_f64_ulp25: 138; GFX8: ; %bb.0: 139; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX8-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 141; GFX8-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 142; GFX8-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 143; GFX8-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 144; GFX8-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] 145; GFX8-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0 146; GFX8-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] 147; GFX8-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] 148; GFX8-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] 149; GFX8-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] 150; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 151; GFX8-NEXT: s_setpc_b64 s[30:31] 152; 153; GFX9-LABEL: v_fdiv_f64_ulp25: 154; GFX9: ; %bb.0: 155; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 156; GFX9-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 157; GFX9-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 158; GFX9-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 159; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 160; GFX9-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] 161; GFX9-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0 162; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] 163; GFX9-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] 164; GFX9-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] 165; GFX9-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] 166; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 167; GFX9-NEXT: s_setpc_b64 s[30:31] 168; 169; GFX10-LABEL: v_fdiv_f64_ulp25: 170; GFX10: ; %bb.0: 171; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 172; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 173; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] 174; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] 175; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 176; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 177; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 178; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 179; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 180; GFX10-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7] 181; GFX10-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11] 182; GFX10-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9] 183; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 184; GFX10-NEXT: s_setpc_b64 s[30:31] 185 %fdiv = fdiv double %a, %b, !fpmath !0 186 ret double %fdiv 187} 188 189define double @v_rcp_f64(double %x) { 190; GFX6-LABEL: v_rcp_f64: 191; GFX6: ; %bb.0: 192; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 193; GFX6-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 194; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 195; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 196; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 197; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 198; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 199; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 200; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 201; GFX6-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 202; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 203; GFX6-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 204; GFX6-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] 205; GFX6-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] 206; GFX6-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] 207; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 208; GFX6-NEXT: s_setpc_b64 s[30:31] 209; 210; GFX8-LABEL: v_rcp_f64: 211; GFX8: ; %bb.0: 212; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 213; GFX8-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 214; GFX8-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 215; GFX8-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 216; GFX8-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 217; GFX8-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0 218; GFX8-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0 219; GFX8-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] 220; GFX8-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5] 221; GFX8-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7] 222; GFX8-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9] 223; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 224; GFX8-NEXT: s_setpc_b64 s[30:31] 225; 226; GFX9-LABEL: v_rcp_f64: 227; GFX9: ; %bb.0: 228; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; GFX9-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 230; GFX9-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 231; GFX9-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 232; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 233; GFX9-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0 234; GFX9-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0 235; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] 236; GFX9-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5] 237; GFX9-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7] 238; GFX9-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9] 239; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 240; GFX9-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX10-LABEL: v_rcp_f64: 243; GFX10: ; %bb.0: 244; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 246; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 247; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 248; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 249; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 250; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 251; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 252; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 253; GFX10-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] 254; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] 255; GFX10-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] 256; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 257; GFX10-NEXT: s_setpc_b64 s[30:31] 258 %fdiv = fdiv double 1.0, %x 259 ret double %fdiv 260} 261 262define double @v_rcp_f64_arcp(double %x) { 263; GFX6-LABEL: v_rcp_f64_arcp: 264; GFX6: ; %bb.0: 265; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX6-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 267; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 268; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 269; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 270; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 271; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 272; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 273; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 274; GFX6-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 275; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 276; GFX6-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 277; GFX6-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] 278; GFX6-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] 279; GFX6-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] 280; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 281; GFX6-NEXT: s_setpc_b64 s[30:31] 282; 283; GFX8-LABEL: v_rcp_f64_arcp: 284; GFX8: ; %bb.0: 285; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 286; GFX8-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 287; GFX8-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 288; GFX8-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 289; GFX8-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 290; GFX8-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0 291; GFX8-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0 292; GFX8-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] 293; GFX8-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5] 294; GFX8-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7] 295; GFX8-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9] 296; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 297; GFX8-NEXT: s_setpc_b64 s[30:31] 298; 299; GFX9-LABEL: v_rcp_f64_arcp: 300; GFX9: ; %bb.0: 301; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; GFX9-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 303; GFX9-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 304; GFX9-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 305; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 306; GFX9-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0 307; GFX9-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0 308; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] 309; GFX9-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5] 310; GFX9-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7] 311; GFX9-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9] 312; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 313; GFX9-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX10-LABEL: v_rcp_f64_arcp: 316; GFX10: ; %bb.0: 317; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 319; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 320; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 321; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 322; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 323; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 324; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 325; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 326; GFX10-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] 327; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] 328; GFX10-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] 329; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 330; GFX10-NEXT: s_setpc_b64 s[30:31] 331 %fdiv = fdiv arcp double 1.0, %x 332 ret double %fdiv 333} 334 335define double @v_rcp_f64_arcp_afn(double %x) { 336; GCN-LABEL: v_rcp_f64_arcp_afn: 337; GCN: ; %bb.0: 338; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GCN-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] 340; GCN-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 341; GCN-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3] 342; GCN-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 343; GCN-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3] 344; GCN-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3] 345; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0 346; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 347; GCN-NEXT: s_setpc_b64 s[30:31] 348; 349; GFX10-LABEL: v_rcp_f64_arcp_afn: 350; GFX10: ; %bb.0: 351; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 353; GFX10-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] 354; GFX10-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 355; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3] 356; GFX10-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 357; GFX10-NEXT: v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3] 358; GFX10-NEXT: v_mul_f64 v[4:5], 1.0, v[2:3] 359; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0 360; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] 361; GFX10-NEXT: s_setpc_b64 s[30:31] 362 %fdiv = fdiv arcp afn double 1.0, %x 363 ret double %fdiv 364} 365 366define double @v_rcp_f64_ulp25(double %x) { 367; GFX6-LABEL: v_rcp_f64_ulp25: 368; GFX6: ; %bb.0: 369; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 370; GFX6-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 371; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], 1.0, v[0:1], 1.0 372; GFX6-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 373; GFX6-NEXT: v_mov_b32_e32 v10, 0x3ff00000 374; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v10, v9 375; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v3 376; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 377; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 378; GFX6-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 379; GFX6-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 380; GFX6-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 381; GFX6-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] 382; GFX6-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] 383; GFX6-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] 384; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 385; GFX6-NEXT: s_setpc_b64 s[30:31] 386; 387; GFX8-LABEL: v_rcp_f64_ulp25: 388; GFX8: ; %bb.0: 389; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; GFX8-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 391; GFX8-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 392; GFX8-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 393; GFX8-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 394; GFX8-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0 395; GFX8-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0 396; GFX8-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] 397; GFX8-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5] 398; GFX8-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7] 399; GFX8-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9] 400; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 401; GFX8-NEXT: s_setpc_b64 s[30:31] 402; 403; GFX9-LABEL: v_rcp_f64_ulp25: 404; GFX9: ; %bb.0: 405; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX9-NEXT: v_div_scale_f64 v[2:3], s[4:5], v[0:1], v[0:1], 1.0 407; GFX9-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 408; GFX9-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 409; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 410; GFX9-NEXT: v_div_scale_f64 v[6:7], vcc, 1.0, v[0:1], 1.0 411; GFX9-NEXT: v_fma_f64 v[8:9], -v[2:3], v[4:5], 1.0 412; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[8:9], v[4:5] 413; GFX9-NEXT: v_mul_f64 v[8:9], v[6:7], v[4:5] 414; GFX9-NEXT: v_fma_f64 v[2:3], -v[2:3], v[8:9], v[6:7] 415; GFX9-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[8:9] 416; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 417; GFX9-NEXT: s_setpc_b64 s[30:31] 418; 419; GFX10-LABEL: v_rcp_f64_ulp25: 420; GFX10: ; %bb.0: 421; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 422; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 423; GFX10-NEXT: v_div_scale_f64 v[2:3], s4, v[0:1], v[0:1], 1.0 424; GFX10-NEXT: v_div_scale_f64 v[8:9], vcc_lo, 1.0, v[0:1], 1.0 425; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 426; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 427; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 428; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 429; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[6:7], v[4:5] 430; GFX10-NEXT: v_mul_f64 v[6:7], v[8:9], v[4:5] 431; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], v[6:7], v[8:9] 432; GFX10-NEXT: v_div_fmas_f64 v[2:3], v[2:3], v[4:5], v[6:7] 433; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[2:3], v[0:1], 1.0 434; GFX10-NEXT: s_setpc_b64 s[30:31] 435 %fdiv = fdiv double 1.0, %x, !fpmath !0 436 ret double %fdiv 437} 438 439define double @v_fdiv_f64_afn_ulp25(double %a, double %b) { 440; GCN-LABEL: v_fdiv_f64_afn_ulp25: 441; GCN: ; %bb.0: 442; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 443; GCN-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 444; GCN-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 445; GCN-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 446; GCN-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 447; GCN-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 448; GCN-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5] 449; GCN-NEXT: v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1] 450; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7] 451; GCN-NEXT: s_setpc_b64 s[30:31] 452; 453; GFX10-LABEL: v_fdiv_f64_afn_ulp25: 454; GFX10: ; %bb.0: 455; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 456; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 457; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[2:3] 458; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 459; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 460; GFX10-NEXT: v_fma_f64 v[6:7], -v[2:3], v[4:5], 1.0 461; GFX10-NEXT: v_fma_f64 v[4:5], v[6:7], v[4:5], v[4:5] 462; GFX10-NEXT: v_mul_f64 v[6:7], v[0:1], v[4:5] 463; GFX10-NEXT: v_fma_f64 v[0:1], -v[2:3], v[6:7], v[0:1] 464; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[6:7] 465; GFX10-NEXT: s_setpc_b64 s[30:31] 466 %fdiv = fdiv afn double %a, %b, !fpmath !0 467 ret double %fdiv 468} 469 470define double @v_fdiv_f64_arcp_ulp25(double %a, double %b) { 471; GFX6-LABEL: v_fdiv_f64_arcp_ulp25: 472; GFX6: ; %bb.0: 473; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 475; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[0:1], v[2:3], v[0:1] 476; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 477; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v5 478; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v11 479; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 480; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 481; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 482; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 483; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 484; GFX6-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7] 485; GFX6-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11] 486; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9] 487; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 488; GFX6-NEXT: s_setpc_b64 s[30:31] 489; 490; GFX8-LABEL: v_fdiv_f64_arcp_ulp25: 491; GFX8: ; %bb.0: 492; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 493; GFX8-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 494; GFX8-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 495; GFX8-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 496; GFX8-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 497; GFX8-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] 498; GFX8-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0 499; GFX8-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] 500; GFX8-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] 501; GFX8-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] 502; GFX8-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] 503; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 504; GFX8-NEXT: s_setpc_b64 s[30:31] 505; 506; GFX9-LABEL: v_fdiv_f64_arcp_ulp25: 507; GFX9: ; %bb.0: 508; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 509; GFX9-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[2:3], v[2:3], v[0:1] 510; GFX9-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 511; GFX9-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 512; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 513; GFX9-NEXT: v_div_scale_f64 v[8:9], vcc, v[0:1], v[2:3], v[0:1] 514; GFX9-NEXT: v_fma_f64 v[10:11], -v[4:5], v[6:7], 1.0 515; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[10:11], v[6:7] 516; GFX9-NEXT: v_mul_f64 v[10:11], v[8:9], v[6:7] 517; GFX9-NEXT: v_fma_f64 v[4:5], -v[4:5], v[10:11], v[8:9] 518; GFX9-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[10:11] 519; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 520; GFX9-NEXT: s_setpc_b64 s[30:31] 521; 522; GFX10-LABEL: v_fdiv_f64_arcp_ulp25: 523; GFX10: ; %bb.0: 524; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 525; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 526; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[2:3], v[2:3], v[0:1] 527; GFX10-NEXT: v_div_scale_f64 v[10:11], vcc_lo, v[0:1], v[2:3], v[0:1] 528; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 529; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 530; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 531; GFX10-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 532; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 533; GFX10-NEXT: v_mul_f64 v[8:9], v[10:11], v[6:7] 534; GFX10-NEXT: v_fma_f64 v[4:5], -v[4:5], v[8:9], v[10:11] 535; GFX10-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[6:7], v[8:9] 536; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[2:3], v[0:1] 537; GFX10-NEXT: s_setpc_b64 s[30:31] 538 %fdiv = fdiv arcp double %a, %b, !fpmath !0 539 ret double %fdiv 540} 541 542define <2 x double> @v_fdiv_v2f64(<2 x double> %a, <2 x double> %b) { 543; GFX6-LABEL: v_fdiv_v2f64: 544; GFX6: ; %bb.0: 545; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 547; GFX6-NEXT: v_div_scale_f64 v[14:15], s[4:5], v[6:7], v[6:7], v[2:3] 548; GFX6-NEXT: v_rcp_f64_e32 v[10:11], v[8:9] 549; GFX6-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[0:1], v[4:5], v[0:1] 550; GFX6-NEXT: v_rcp_f64_e32 v[16:17], v[14:15] 551; GFX6-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 552; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v19 553; GFX6-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] 554; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v9 555; GFX6-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 556; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 557; GFX6-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] 558; GFX6-NEXT: v_fma_f64 v[12:13], -v[14:15], v[16:17], 1.0 559; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v15 560; GFX6-NEXT: v_fma_f64 v[12:13], v[16:17], v[12:13], v[16:17] 561; GFX6-NEXT: v_mul_f64 v[16:17], v[18:19], v[10:11] 562; GFX6-NEXT: v_fma_f64 v[18:19], -v[8:9], v[16:17], v[18:19] 563; GFX6-NEXT: v_fma_f64 v[8:9], -v[14:15], v[12:13], 1.0 564; GFX6-NEXT: v_div_fmas_f64 v[10:11], v[18:19], v[10:11], v[16:17] 565; GFX6-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13] 566; GFX6-NEXT: v_div_scale_f64 v[12:13], s[6:7], v[2:3], v[6:7], v[2:3] 567; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[10:11], v[4:5], v[0:1] 568; GFX6-NEXT: v_mul_f64 v[16:17], v[12:13], v[8:9] 569; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v3, v13 570; GFX6-NEXT: v_fma_f64 v[18:19], -v[14:15], v[16:17], v[12:13] 571; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 572; GFX6-NEXT: s_nop 1 573; GFX6-NEXT: v_div_fmas_f64 v[8:9], v[18:19], v[8:9], v[16:17] 574; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[8:9], v[6:7], v[2:3] 575; GFX6-NEXT: s_setpc_b64 s[30:31] 576; 577; GFX8-LABEL: v_fdiv_v2f64: 578; GFX8: ; %bb.0: 579; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 580; GFX8-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 581; GFX8-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3] 582; GFX8-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 583; GFX8-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 584; GFX8-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 585; GFX8-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 586; GFX8-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 587; GFX8-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 588; GFX8-NEXT: v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1] 589; GFX8-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 590; GFX8-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 591; GFX8-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 592; GFX8-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] 593; GFX8-NEXT: v_mul_f64 v[16:17], v[18:19], v[12:13] 594; GFX8-NEXT: v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19] 595; GFX8-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3] 596; GFX8-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17] 597; GFX8-NEXT: s_mov_b64 vcc, s[4:5] 598; GFX8-NEXT: v_mul_f64 v[20:21], v[18:19], v[14:15] 599; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 600; GFX8-NEXT: v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19] 601; GFX8-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21] 602; GFX8-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 603; GFX8-NEXT: s_setpc_b64 s[30:31] 604; 605; GFX9-LABEL: v_fdiv_v2f64: 606; GFX9: ; %bb.0: 607; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 608; GFX9-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 609; GFX9-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3] 610; GFX9-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 611; GFX9-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 612; GFX9-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 613; GFX9-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 614; GFX9-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 615; GFX9-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 616; GFX9-NEXT: v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1] 617; GFX9-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 618; GFX9-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 619; GFX9-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 620; GFX9-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] 621; GFX9-NEXT: v_mul_f64 v[16:17], v[18:19], v[12:13] 622; GFX9-NEXT: v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19] 623; GFX9-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3] 624; GFX9-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17] 625; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 626; GFX9-NEXT: v_mul_f64 v[20:21], v[18:19], v[14:15] 627; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 628; GFX9-NEXT: v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19] 629; GFX9-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21] 630; GFX9-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 631; GFX9-NEXT: s_setpc_b64 s[30:31] 632; 633; GFX10-LABEL: v_fdiv_v2f64: 634; GFX10: ; %bb.0: 635; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 636; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 637; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] 638; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] 639; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] 640; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 641; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 642; GFX10-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 643; GFX10-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 644; GFX10-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 645; GFX10-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 646; GFX10-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 647; GFX10-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 648; GFX10-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 649; GFX10-NEXT: v_div_scale_f64 v[16:17], s4, v[2:3], v[6:7], v[2:3] 650; GFX10-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 651; GFX10-NEXT: v_mul_f64 v[18:19], v[20:21], v[12:13] 652; GFX10-NEXT: v_mul_f64 v[22:23], v[16:17], v[14:15] 653; GFX10-NEXT: v_fma_f64 v[8:9], -v[8:9], v[18:19], v[20:21] 654; GFX10-NEXT: v_fma_f64 v[10:11], -v[10:11], v[22:23], v[16:17] 655; GFX10-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[18:19] 656; GFX10-NEXT: s_mov_b32 vcc_lo, s4 657; GFX10-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[22:23] 658; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 659; GFX10-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 660; GFX10-NEXT: s_setpc_b64 s[30:31] 661 %fdiv = fdiv <2 x double> %a, %b 662 ret <2 x double> %fdiv 663} 664 665define <2 x double> @v_fdiv_v2f64_afn(<2 x double> %a, <2 x double> %b) { 666; GCN-LABEL: v_fdiv_v2f64_afn: 667; GCN: ; %bb.0: 668; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 669; GCN-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 670; GCN-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 671; GCN-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 672; GCN-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 673; GCN-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 674; GCN-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 675; GCN-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 676; GCN-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 677; GCN-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 678; GCN-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 679; GCN-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9] 680; GCN-NEXT: v_mul_f64 v[14:15], v[2:3], v[10:11] 681; GCN-NEXT: v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1] 682; GCN-NEXT: v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3] 683; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13] 684; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15] 685; GCN-NEXT: s_setpc_b64 s[30:31] 686; 687; GFX10-LABEL: v_fdiv_v2f64_afn: 688; GFX10: ; %bb.0: 689; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 690; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 691; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 692; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 693; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 694; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 695; GFX10-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 696; GFX10-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 697; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 698; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 699; GFX10-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 700; GFX10-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 701; GFX10-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9] 702; GFX10-NEXT: v_mul_f64 v[14:15], v[2:3], v[10:11] 703; GFX10-NEXT: v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1] 704; GFX10-NEXT: v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3] 705; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13] 706; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15] 707; GFX10-NEXT: s_setpc_b64 s[30:31] 708 %fdiv = fdiv afn <2 x double> %a, %b 709 ret <2 x double> %fdiv 710} 711 712define <2 x double> @v_fdiv_v2f64_ulp25(<2 x double> %a, <2 x double> %b) { 713; GFX6-LABEL: v_fdiv_v2f64_ulp25: 714; GFX6: ; %bb.0: 715; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 716; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 717; GFX6-NEXT: v_div_scale_f64 v[14:15], s[4:5], v[6:7], v[6:7], v[2:3] 718; GFX6-NEXT: v_rcp_f64_e32 v[10:11], v[8:9] 719; GFX6-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[0:1], v[4:5], v[0:1] 720; GFX6-NEXT: v_rcp_f64_e32 v[16:17], v[14:15] 721; GFX6-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 722; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v19 723; GFX6-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] 724; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v9 725; GFX6-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 726; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 727; GFX6-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] 728; GFX6-NEXT: v_fma_f64 v[12:13], -v[14:15], v[16:17], 1.0 729; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v15 730; GFX6-NEXT: v_fma_f64 v[12:13], v[16:17], v[12:13], v[16:17] 731; GFX6-NEXT: v_mul_f64 v[16:17], v[18:19], v[10:11] 732; GFX6-NEXT: v_fma_f64 v[18:19], -v[8:9], v[16:17], v[18:19] 733; GFX6-NEXT: v_fma_f64 v[8:9], -v[14:15], v[12:13], 1.0 734; GFX6-NEXT: v_div_fmas_f64 v[10:11], v[18:19], v[10:11], v[16:17] 735; GFX6-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13] 736; GFX6-NEXT: v_div_scale_f64 v[12:13], s[6:7], v[2:3], v[6:7], v[2:3] 737; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[10:11], v[4:5], v[0:1] 738; GFX6-NEXT: v_mul_f64 v[16:17], v[12:13], v[8:9] 739; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v3, v13 740; GFX6-NEXT: v_fma_f64 v[18:19], -v[14:15], v[16:17], v[12:13] 741; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 742; GFX6-NEXT: s_nop 1 743; GFX6-NEXT: v_div_fmas_f64 v[8:9], v[18:19], v[8:9], v[16:17] 744; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[8:9], v[6:7], v[2:3] 745; GFX6-NEXT: s_setpc_b64 s[30:31] 746; 747; GFX8-LABEL: v_fdiv_v2f64_ulp25: 748; GFX8: ; %bb.0: 749; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 750; GFX8-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 751; GFX8-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3] 752; GFX8-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 753; GFX8-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 754; GFX8-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 755; GFX8-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 756; GFX8-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 757; GFX8-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 758; GFX8-NEXT: v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1] 759; GFX8-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 760; GFX8-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 761; GFX8-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 762; GFX8-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] 763; GFX8-NEXT: v_mul_f64 v[16:17], v[18:19], v[12:13] 764; GFX8-NEXT: v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19] 765; GFX8-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3] 766; GFX8-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17] 767; GFX8-NEXT: s_mov_b64 vcc, s[4:5] 768; GFX8-NEXT: v_mul_f64 v[20:21], v[18:19], v[14:15] 769; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 770; GFX8-NEXT: v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19] 771; GFX8-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21] 772; GFX8-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 773; GFX8-NEXT: s_setpc_b64 s[30:31] 774; 775; GFX9-LABEL: v_fdiv_v2f64_ulp25: 776; GFX9: ; %bb.0: 777; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 778; GFX9-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 779; GFX9-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3] 780; GFX9-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 781; GFX9-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 782; GFX9-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 783; GFX9-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 784; GFX9-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 785; GFX9-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 786; GFX9-NEXT: v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1] 787; GFX9-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 788; GFX9-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 789; GFX9-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 790; GFX9-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] 791; GFX9-NEXT: v_mul_f64 v[16:17], v[18:19], v[12:13] 792; GFX9-NEXT: v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19] 793; GFX9-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3] 794; GFX9-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17] 795; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 796; GFX9-NEXT: v_mul_f64 v[20:21], v[18:19], v[14:15] 797; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 798; GFX9-NEXT: v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19] 799; GFX9-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21] 800; GFX9-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 801; GFX9-NEXT: s_setpc_b64 s[30:31] 802; 803; GFX10-LABEL: v_fdiv_v2f64_ulp25: 804; GFX10: ; %bb.0: 805; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 806; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 807; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] 808; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] 809; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] 810; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 811; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 812; GFX10-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 813; GFX10-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 814; GFX10-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 815; GFX10-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 816; GFX10-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 817; GFX10-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 818; GFX10-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 819; GFX10-NEXT: v_div_scale_f64 v[16:17], s4, v[2:3], v[6:7], v[2:3] 820; GFX10-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 821; GFX10-NEXT: v_mul_f64 v[18:19], v[20:21], v[12:13] 822; GFX10-NEXT: v_mul_f64 v[22:23], v[16:17], v[14:15] 823; GFX10-NEXT: v_fma_f64 v[8:9], -v[8:9], v[18:19], v[20:21] 824; GFX10-NEXT: v_fma_f64 v[10:11], -v[10:11], v[22:23], v[16:17] 825; GFX10-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[18:19] 826; GFX10-NEXT: s_mov_b32 vcc_lo, s4 827; GFX10-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[22:23] 828; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 829; GFX10-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 830; GFX10-NEXT: s_setpc_b64 s[30:31] 831 %fdiv = fdiv <2 x double> %a, %b, !fpmath !0 832 ret <2 x double> %fdiv 833} 834 835define <2 x double> @v_rcp_v2f64(<2 x double> %x) { 836; GFX6-LABEL: v_rcp_v2f64: 837; GFX6: ; %bb.0: 838; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 840; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 841; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 842; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 843; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 844; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 845; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 846; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 847; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 848; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 849; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] 850; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 851; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] 852; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 853; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] 854; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 855; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] 856; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 857; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 858; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 859; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 860; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] 861; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 862; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] 863; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 864; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] 865; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 866; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] 867; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 868; GFX6-NEXT: s_setpc_b64 s[30:31] 869; 870; GFX8-LABEL: v_rcp_v2f64: 871; GFX8: ; %bb.0: 872; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 873; GFX8-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 874; GFX8-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 875; GFX8-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 876; GFX8-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 877; GFX8-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 878; GFX8-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 879; GFX8-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 880; GFX8-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 881; GFX8-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0 882; GFX8-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 883; GFX8-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0 884; GFX8-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0 885; GFX8-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9] 886; GFX8-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11] 887; GFX8-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9] 888; GFX8-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11] 889; GFX8-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13] 890; GFX8-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17] 891; GFX8-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 892; GFX8-NEXT: s_mov_b64 vcc, s[4:5] 893; GFX8-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 894; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 895; GFX8-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 896; GFX8-NEXT: s_setpc_b64 s[30:31] 897; 898; GFX9-LABEL: v_rcp_v2f64: 899; GFX9: ; %bb.0: 900; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 901; GFX9-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 902; GFX9-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 903; GFX9-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 904; GFX9-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 905; GFX9-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 906; GFX9-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 907; GFX9-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 908; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 909; GFX9-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0 910; GFX9-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 911; GFX9-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0 912; GFX9-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0 913; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9] 914; GFX9-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11] 915; GFX9-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9] 916; GFX9-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11] 917; GFX9-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13] 918; GFX9-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17] 919; GFX9-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 920; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 921; GFX9-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 922; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 923; GFX9-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 924; GFX9-NEXT: s_setpc_b64 s[30:31] 925; 926; GFX10-LABEL: v_rcp_v2f64: 927; GFX10: ; %bb.0: 928; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 929; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 930; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 931; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 932; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 933; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 934; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 935; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 936; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 937; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 938; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 939; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 940; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 941; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 942; GFX10-NEXT: v_div_scale_f64 v[12:13], s4, 1.0, v[2:3], 1.0 943; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 944; GFX10-NEXT: v_mul_f64 v[14:15], v[16:17], v[8:9] 945; GFX10-NEXT: v_mul_f64 v[18:19], v[12:13], v[10:11] 946; GFX10-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[16:17] 947; GFX10-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[12:13] 948; GFX10-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 949; GFX10-NEXT: s_mov_b32 vcc_lo, s4 950; GFX10-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 951; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 952; GFX10-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 953; GFX10-NEXT: s_setpc_b64 s[30:31] 954 %fdiv = fdiv <2 x double> <double 1.0, double 1.0>, %x 955 ret <2 x double> %fdiv 956} 957 958define <2 x double> @v_rcp_v2f64_arcp(<2 x double> %x) { 959; GFX6-LABEL: v_rcp_v2f64_arcp: 960; GFX6: ; %bb.0: 961; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 962; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 963; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 964; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 965; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 966; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 967; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 968; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 969; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 970; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 971; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 972; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] 973; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 974; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] 975; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 976; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] 977; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 978; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] 979; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 980; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 981; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 982; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 983; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] 984; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 985; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] 986; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 987; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] 988; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 989; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] 990; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 991; GFX6-NEXT: s_setpc_b64 s[30:31] 992; 993; GFX8-LABEL: v_rcp_v2f64_arcp: 994; GFX8: ; %bb.0: 995; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 996; GFX8-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 997; GFX8-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 998; GFX8-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 999; GFX8-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1000; GFX8-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1001; GFX8-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1002; GFX8-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1003; GFX8-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1004; GFX8-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0 1005; GFX8-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1006; GFX8-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0 1007; GFX8-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0 1008; GFX8-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9] 1009; GFX8-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11] 1010; GFX8-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9] 1011; GFX8-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11] 1012; GFX8-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13] 1013; GFX8-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17] 1014; GFX8-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 1015; GFX8-NEXT: s_mov_b64 vcc, s[4:5] 1016; GFX8-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 1017; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 1018; GFX8-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 1019; GFX8-NEXT: s_setpc_b64 s[30:31] 1020; 1021; GFX9-LABEL: v_rcp_v2f64_arcp: 1022; GFX9: ; %bb.0: 1023; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GFX9-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 1025; GFX9-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 1026; GFX9-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 1027; GFX9-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1028; GFX9-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1029; GFX9-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1030; GFX9-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1031; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1032; GFX9-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0 1033; GFX9-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1034; GFX9-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0 1035; GFX9-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0 1036; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9] 1037; GFX9-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11] 1038; GFX9-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9] 1039; GFX9-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11] 1040; GFX9-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13] 1041; GFX9-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17] 1042; GFX9-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 1043; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 1044; GFX9-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 1045; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 1046; GFX9-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 1047; GFX9-NEXT: s_setpc_b64 s[30:31] 1048; 1049; GFX10-LABEL: v_rcp_v2f64_arcp: 1050; GFX10: ; %bb.0: 1051; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1052; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1053; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 1054; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 1055; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 1056; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1057; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1058; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1059; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1060; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1061; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1062; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1063; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1064; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1065; GFX10-NEXT: v_div_scale_f64 v[12:13], s4, 1.0, v[2:3], 1.0 1066; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1067; GFX10-NEXT: v_mul_f64 v[14:15], v[16:17], v[8:9] 1068; GFX10-NEXT: v_mul_f64 v[18:19], v[12:13], v[10:11] 1069; GFX10-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[16:17] 1070; GFX10-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[12:13] 1071; GFX10-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 1072; GFX10-NEXT: s_mov_b32 vcc_lo, s4 1073; GFX10-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 1074; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 1075; GFX10-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 1076; GFX10-NEXT: s_setpc_b64 s[30:31] 1077 %fdiv = fdiv arcp <2 x double> <double 1.0, double 1.0>, %x 1078 ret <2 x double> %fdiv 1079} 1080 1081define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) { 1082; GCN-LABEL: v_rcp_v2f64_arcp_afn: 1083; GCN: ; %bb.0: 1084; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1085; GCN-NEXT: v_rcp_f64_e32 v[4:5], v[0:1] 1086; GCN-NEXT: v_rcp_f64_e32 v[6:7], v[2:3] 1087; GCN-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0 1088; GCN-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0 1089; GCN-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5] 1090; GCN-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7] 1091; GCN-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0 1092; GCN-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0 1093; GCN-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5] 1094; GCN-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7] 1095; GCN-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5] 1096; GCN-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7] 1097; GCN-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0 1098; GCN-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0 1099; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 1100; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 1101; GCN-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX10-LABEL: v_rcp_v2f64_arcp_afn: 1104; GFX10: ; %bb.0: 1105; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1107; GFX10-NEXT: v_rcp_f64_e32 v[4:5], v[0:1] 1108; GFX10-NEXT: v_rcp_f64_e32 v[6:7], v[2:3] 1109; GFX10-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0 1110; GFX10-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0 1111; GFX10-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5] 1112; GFX10-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7] 1113; GFX10-NEXT: v_fma_f64 v[8:9], -v[0:1], v[4:5], 1.0 1114; GFX10-NEXT: v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0 1115; GFX10-NEXT: v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5] 1116; GFX10-NEXT: v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7] 1117; GFX10-NEXT: v_mul_f64 v[8:9], 1.0, v[4:5] 1118; GFX10-NEXT: v_mul_f64 v[10:11], 1.0, v[6:7] 1119; GFX10-NEXT: v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0 1120; GFX10-NEXT: v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0 1121; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9] 1122; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11] 1123; GFX10-NEXT: s_setpc_b64 s[30:31] 1124 %fdiv = fdiv arcp afn <2 x double> <double 1.0, double 1.0>, %x 1125 ret <2 x double> %fdiv 1126} 1127 1128define <2 x double> @v_rcp_v2f64_ulp25(<2 x double> %x) { 1129; GFX6-LABEL: v_rcp_v2f64_ulp25: 1130; GFX6: ; %bb.0: 1131; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1132; GFX6-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 1133; GFX6-NEXT: v_div_scale_f64 v[10:11], s[4:5], 1.0, v[0:1], 1.0 1134; GFX6-NEXT: v_rcp_f64_e32 v[6:7], v[4:5] 1135; GFX6-NEXT: v_mov_b32_e32 v18, 0x3ff00000 1136; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v11 1137; GFX6-NEXT: v_fma_f64 v[8:9], -v[4:5], v[6:7], 1.0 1138; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[8:9], v[6:7] 1139; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[2:3], v[2:3], 1.0 1140; GFX6-NEXT: v_fma_f64 v[12:13], -v[4:5], v[6:7], 1.0 1141; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v1, v5 1142; GFX6-NEXT: v_fma_f64 v[6:7], v[6:7], v[12:13], v[6:7] 1143; GFX6-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 1144; GFX6-NEXT: v_mul_f64 v[14:15], v[10:11], v[6:7] 1145; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 1146; GFX6-NEXT: v_fma_f64 v[10:11], -v[4:5], v[14:15], v[10:11] 1147; GFX6-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 1148; GFX6-NEXT: v_div_fmas_f64 v[6:7], v[10:11], v[6:7], v[14:15] 1149; GFX6-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 1150; GFX6-NEXT: v_div_scale_f64 v[16:17], s[6:7], 1.0, v[2:3], 1.0 1151; GFX6-NEXT: v_fma_f64 v[4:5], -v[8:9], v[12:13], 1.0 1152; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v3, v9 1153; GFX6-NEXT: v_fma_f64 v[4:5], v[12:13], v[4:5], v[12:13] 1154; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v18, v17 1155; GFX6-NEXT: v_mul_f64 v[12:13], v[16:17], v[4:5] 1156; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 1157; GFX6-NEXT: v_fma_f64 v[10:11], -v[8:9], v[12:13], v[16:17] 1158; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[6:7], v[0:1], 1.0 1159; GFX6-NEXT: v_div_fmas_f64 v[4:5], v[10:11], v[4:5], v[12:13] 1160; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[4:5], v[2:3], 1.0 1161; GFX6-NEXT: s_setpc_b64 s[30:31] 1162; 1163; GFX8-LABEL: v_rcp_v2f64_ulp25: 1164; GFX8: ; %bb.0: 1165; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GFX8-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 1167; GFX8-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 1168; GFX8-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 1169; GFX8-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1170; GFX8-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1171; GFX8-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1172; GFX8-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1173; GFX8-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1174; GFX8-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0 1175; GFX8-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1176; GFX8-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0 1177; GFX8-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0 1178; GFX8-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9] 1179; GFX8-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11] 1180; GFX8-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9] 1181; GFX8-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11] 1182; GFX8-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13] 1183; GFX8-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17] 1184; GFX8-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 1185; GFX8-NEXT: s_mov_b64 vcc, s[4:5] 1186; GFX8-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 1187; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 1188; GFX8-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 1189; GFX8-NEXT: s_setpc_b64 s[30:31] 1190; 1191; GFX9-LABEL: v_rcp_v2f64_ulp25: 1192; GFX9: ; %bb.0: 1193; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1194; GFX9-NEXT: v_div_scale_f64 v[4:5], s[4:5], v[0:1], v[0:1], 1.0 1195; GFX9-NEXT: v_div_scale_f64 v[6:7], s[4:5], v[2:3], v[2:3], 1.0 1196; GFX9-NEXT: v_div_scale_f64 v[16:17], s[4:5], 1.0, v[2:3], 1.0 1197; GFX9-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1198; GFX9-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1199; GFX9-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1200; GFX9-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1201; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1202; GFX9-NEXT: v_div_scale_f64 v[12:13], vcc, 1.0, v[0:1], 1.0 1203; GFX9-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1204; GFX9-NEXT: v_fma_f64 v[14:15], -v[4:5], v[8:9], 1.0 1205; GFX9-NEXT: v_fma_f64 v[18:19], -v[6:7], v[10:11], 1.0 1206; GFX9-NEXT: v_fma_f64 v[8:9], v[8:9], v[14:15], v[8:9] 1207; GFX9-NEXT: v_fma_f64 v[10:11], v[10:11], v[18:19], v[10:11] 1208; GFX9-NEXT: v_mul_f64 v[14:15], v[12:13], v[8:9] 1209; GFX9-NEXT: v_mul_f64 v[18:19], v[16:17], v[10:11] 1210; GFX9-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[12:13] 1211; GFX9-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[16:17] 1212; GFX9-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 1213; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 1214; GFX9-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 1215; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 1216; GFX9-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 1217; GFX9-NEXT: s_setpc_b64 s[30:31] 1218; 1219; GFX10-LABEL: v_rcp_v2f64_ulp25: 1220; GFX10: ; %bb.0: 1221; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1222; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1223; GFX10-NEXT: v_div_scale_f64 v[4:5], s4, v[0:1], v[0:1], 1.0 1224; GFX10-NEXT: v_div_scale_f64 v[6:7], s4, v[2:3], v[2:3], 1.0 1225; GFX10-NEXT: v_div_scale_f64 v[16:17], vcc_lo, 1.0, v[0:1], 1.0 1226; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1227; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1228; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1229; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1230; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1231; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1232; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1233; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1234; GFX10-NEXT: v_fma_f64 v[8:9], v[8:9], v[12:13], v[8:9] 1235; GFX10-NEXT: v_div_scale_f64 v[12:13], s4, 1.0, v[2:3], 1.0 1236; GFX10-NEXT: v_fma_f64 v[10:11], v[10:11], v[14:15], v[10:11] 1237; GFX10-NEXT: v_mul_f64 v[14:15], v[16:17], v[8:9] 1238; GFX10-NEXT: v_mul_f64 v[18:19], v[12:13], v[10:11] 1239; GFX10-NEXT: v_fma_f64 v[4:5], -v[4:5], v[14:15], v[16:17] 1240; GFX10-NEXT: v_fma_f64 v[6:7], -v[6:7], v[18:19], v[12:13] 1241; GFX10-NEXT: v_div_fmas_f64 v[4:5], v[4:5], v[8:9], v[14:15] 1242; GFX10-NEXT: s_mov_b32 vcc_lo, s4 1243; GFX10-NEXT: v_div_fmas_f64 v[6:7], v[6:7], v[10:11], v[18:19] 1244; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[4:5], v[0:1], 1.0 1245; GFX10-NEXT: v_div_fixup_f64 v[2:3], v[6:7], v[2:3], 1.0 1246; GFX10-NEXT: s_setpc_b64 s[30:31] 1247 %fdiv = fdiv <2 x double> <double 1.0, double 1.0>, %x, !fpmath !0 1248 ret <2 x double> %fdiv 1249} 1250 1251define <2 x double> @v_fdiv_v2f64_afn_ulp25(<2 x double> %a, <2 x double> %b) { 1252; GCN-LABEL: v_fdiv_v2f64_afn_ulp25: 1253; GCN: ; %bb.0: 1254; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1255; GCN-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1256; GCN-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1257; GCN-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1258; GCN-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1259; GCN-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1260; GCN-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1261; GCN-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1262; GCN-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1263; GCN-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1264; GCN-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1265; GCN-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9] 1266; GCN-NEXT: v_mul_f64 v[14:15], v[2:3], v[10:11] 1267; GCN-NEXT: v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1] 1268; GCN-NEXT: v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3] 1269; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13] 1270; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15] 1271; GCN-NEXT: s_setpc_b64 s[30:31] 1272; 1273; GFX10-LABEL: v_fdiv_v2f64_afn_ulp25: 1274; GFX10: ; %bb.0: 1275; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1276; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1277; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1278; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1279; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1280; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1281; GFX10-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1282; GFX10-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1283; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1284; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1285; GFX10-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1286; GFX10-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1287; GFX10-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9] 1288; GFX10-NEXT: v_mul_f64 v[14:15], v[2:3], v[10:11] 1289; GFX10-NEXT: v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1] 1290; GFX10-NEXT: v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3] 1291; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13] 1292; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15] 1293; GFX10-NEXT: s_setpc_b64 s[30:31] 1294 %fdiv = fdiv afn <2 x double> %a, %b, !fpmath !0 1295 ret <2 x double> %fdiv 1296} 1297 1298define <2 x double> @v_fdiv_v2f64_arcp_ulp25(<2 x double> %a, <2 x double> %b) { 1299; GFX6-LABEL: v_fdiv_v2f64_arcp_ulp25: 1300; GFX6: ; %bb.0: 1301; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1302; GFX6-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 1303; GFX6-NEXT: v_div_scale_f64 v[14:15], s[4:5], v[6:7], v[6:7], v[2:3] 1304; GFX6-NEXT: v_rcp_f64_e32 v[10:11], v[8:9] 1305; GFX6-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[0:1], v[4:5], v[0:1] 1306; GFX6-NEXT: v_rcp_f64_e32 v[16:17], v[14:15] 1307; GFX6-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 1308; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v1, v19 1309; GFX6-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] 1310; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v5, v9 1311; GFX6-NEXT: v_fma_f64 v[12:13], -v[8:9], v[10:11], 1.0 1312; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 1313; GFX6-NEXT: v_fma_f64 v[10:11], v[10:11], v[12:13], v[10:11] 1314; GFX6-NEXT: v_fma_f64 v[12:13], -v[14:15], v[16:17], 1.0 1315; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], v7, v15 1316; GFX6-NEXT: v_fma_f64 v[12:13], v[16:17], v[12:13], v[16:17] 1317; GFX6-NEXT: v_mul_f64 v[16:17], v[18:19], v[10:11] 1318; GFX6-NEXT: v_fma_f64 v[18:19], -v[8:9], v[16:17], v[18:19] 1319; GFX6-NEXT: v_fma_f64 v[8:9], -v[14:15], v[12:13], 1.0 1320; GFX6-NEXT: v_div_fmas_f64 v[10:11], v[18:19], v[10:11], v[16:17] 1321; GFX6-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[12:13] 1322; GFX6-NEXT: v_div_scale_f64 v[12:13], s[6:7], v[2:3], v[6:7], v[2:3] 1323; GFX6-NEXT: v_div_fixup_f64 v[0:1], v[10:11], v[4:5], v[0:1] 1324; GFX6-NEXT: v_mul_f64 v[16:17], v[12:13], v[8:9] 1325; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, v3, v13 1326; GFX6-NEXT: v_fma_f64 v[18:19], -v[14:15], v[16:17], v[12:13] 1327; GFX6-NEXT: s_xor_b64 vcc, vcc, s[4:5] 1328; GFX6-NEXT: s_nop 1 1329; GFX6-NEXT: v_div_fmas_f64 v[8:9], v[18:19], v[8:9], v[16:17] 1330; GFX6-NEXT: v_div_fixup_f64 v[2:3], v[8:9], v[6:7], v[2:3] 1331; GFX6-NEXT: s_setpc_b64 s[30:31] 1332; 1333; GFX8-LABEL: v_fdiv_v2f64_arcp_ulp25: 1334; GFX8: ; %bb.0: 1335; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1336; GFX8-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 1337; GFX8-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3] 1338; GFX8-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 1339; GFX8-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 1340; GFX8-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 1341; GFX8-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 1342; GFX8-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 1343; GFX8-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 1344; GFX8-NEXT: v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1] 1345; GFX8-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 1346; GFX8-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 1347; GFX8-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 1348; GFX8-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] 1349; GFX8-NEXT: v_mul_f64 v[16:17], v[18:19], v[12:13] 1350; GFX8-NEXT: v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19] 1351; GFX8-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3] 1352; GFX8-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17] 1353; GFX8-NEXT: s_mov_b64 vcc, s[4:5] 1354; GFX8-NEXT: v_mul_f64 v[20:21], v[18:19], v[14:15] 1355; GFX8-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 1356; GFX8-NEXT: v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19] 1357; GFX8-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21] 1358; GFX8-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 1359; GFX8-NEXT: s_setpc_b64 s[30:31] 1360; 1361; GFX9-LABEL: v_fdiv_v2f64_arcp_ulp25: 1362; GFX9: ; %bb.0: 1363; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1364; GFX9-NEXT: v_div_scale_f64 v[8:9], s[4:5], v[4:5], v[4:5], v[0:1] 1365; GFX9-NEXT: v_div_scale_f64 v[10:11], s[4:5], v[6:7], v[6:7], v[2:3] 1366; GFX9-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 1367; GFX9-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 1368; GFX9-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 1369; GFX9-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 1370; GFX9-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 1371; GFX9-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 1372; GFX9-NEXT: v_div_scale_f64 v[18:19], vcc, v[0:1], v[4:5], v[0:1] 1373; GFX9-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 1374; GFX9-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 1375; GFX9-NEXT: v_fma_f64 v[16:17], -v[10:11], v[14:15], 1.0 1376; GFX9-NEXT: v_fma_f64 v[14:15], v[14:15], v[16:17], v[14:15] 1377; GFX9-NEXT: v_mul_f64 v[16:17], v[18:19], v[12:13] 1378; GFX9-NEXT: v_fma_f64 v[8:9], -v[8:9], v[16:17], v[18:19] 1379; GFX9-NEXT: v_div_scale_f64 v[18:19], s[4:5], v[2:3], v[6:7], v[2:3] 1380; GFX9-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[16:17] 1381; GFX9-NEXT: s_mov_b64 vcc, s[4:5] 1382; GFX9-NEXT: v_mul_f64 v[20:21], v[18:19], v[14:15] 1383; GFX9-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 1384; GFX9-NEXT: v_fma_f64 v[10:11], -v[10:11], v[20:21], v[18:19] 1385; GFX9-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[20:21] 1386; GFX9-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 1387; GFX9-NEXT: s_setpc_b64 s[30:31] 1388; 1389; GFX10-LABEL: v_fdiv_v2f64_arcp_ulp25: 1390; GFX10: ; %bb.0: 1391; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1393; GFX10-NEXT: v_div_scale_f64 v[8:9], s4, v[4:5], v[4:5], v[0:1] 1394; GFX10-NEXT: v_div_scale_f64 v[10:11], s4, v[6:7], v[6:7], v[2:3] 1395; GFX10-NEXT: v_div_scale_f64 v[20:21], vcc_lo, v[0:1], v[4:5], v[0:1] 1396; GFX10-NEXT: v_rcp_f64_e32 v[12:13], v[8:9] 1397; GFX10-NEXT: v_rcp_f64_e32 v[14:15], v[10:11] 1398; GFX10-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 1399; GFX10-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 1400; GFX10-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 1401; GFX10-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 1402; GFX10-NEXT: v_fma_f64 v[16:17], -v[8:9], v[12:13], 1.0 1403; GFX10-NEXT: v_fma_f64 v[18:19], -v[10:11], v[14:15], 1.0 1404; GFX10-NEXT: v_fma_f64 v[12:13], v[12:13], v[16:17], v[12:13] 1405; GFX10-NEXT: v_div_scale_f64 v[16:17], s4, v[2:3], v[6:7], v[2:3] 1406; GFX10-NEXT: v_fma_f64 v[14:15], v[14:15], v[18:19], v[14:15] 1407; GFX10-NEXT: v_mul_f64 v[18:19], v[20:21], v[12:13] 1408; GFX10-NEXT: v_mul_f64 v[22:23], v[16:17], v[14:15] 1409; GFX10-NEXT: v_fma_f64 v[8:9], -v[8:9], v[18:19], v[20:21] 1410; GFX10-NEXT: v_fma_f64 v[10:11], -v[10:11], v[22:23], v[16:17] 1411; GFX10-NEXT: v_div_fmas_f64 v[8:9], v[8:9], v[12:13], v[18:19] 1412; GFX10-NEXT: s_mov_b32 vcc_lo, s4 1413; GFX10-NEXT: v_div_fmas_f64 v[10:11], v[10:11], v[14:15], v[22:23] 1414; GFX10-NEXT: v_div_fixup_f64 v[0:1], v[8:9], v[4:5], v[0:1] 1415; GFX10-NEXT: v_div_fixup_f64 v[2:3], v[10:11], v[6:7], v[2:3] 1416; GFX10-NEXT: s_setpc_b64 s[30:31] 1417 %fdiv = fdiv arcp <2 x double> %a, %b, !fpmath !0 1418 ret <2 x double> %fdiv 1419} 1420 1421define <2 x double> @v_fdiv_v2f64_arcp_afn_ulp25(<2 x double> %a, <2 x double> %b) { 1422; GCN-LABEL: v_fdiv_v2f64_arcp_afn_ulp25: 1423; GCN: ; %bb.0: 1424; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1425; GCN-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1426; GCN-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1427; GCN-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1428; GCN-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1429; GCN-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1430; GCN-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1431; GCN-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1432; GCN-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1433; GCN-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1434; GCN-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1435; GCN-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9] 1436; GCN-NEXT: v_mul_f64 v[14:15], v[2:3], v[10:11] 1437; GCN-NEXT: v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1] 1438; GCN-NEXT: v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3] 1439; GCN-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13] 1440; GCN-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15] 1441; GCN-NEXT: s_setpc_b64 s[30:31] 1442; 1443; GFX10-LABEL: v_fdiv_v2f64_arcp_afn_ulp25: 1444; GFX10: ; %bb.0: 1445; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1446; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 1447; GFX10-NEXT: v_rcp_f64_e32 v[8:9], v[4:5] 1448; GFX10-NEXT: v_rcp_f64_e32 v[10:11], v[6:7] 1449; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1450; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1451; GFX10-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1452; GFX10-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1453; GFX10-NEXT: v_fma_f64 v[12:13], -v[4:5], v[8:9], 1.0 1454; GFX10-NEXT: v_fma_f64 v[14:15], -v[6:7], v[10:11], 1.0 1455; GFX10-NEXT: v_fma_f64 v[8:9], v[12:13], v[8:9], v[8:9] 1456; GFX10-NEXT: v_fma_f64 v[10:11], v[14:15], v[10:11], v[10:11] 1457; GFX10-NEXT: v_mul_f64 v[12:13], v[0:1], v[8:9] 1458; GFX10-NEXT: v_mul_f64 v[14:15], v[2:3], v[10:11] 1459; GFX10-NEXT: v_fma_f64 v[0:1], -v[4:5], v[12:13], v[0:1] 1460; GFX10-NEXT: v_fma_f64 v[2:3], -v[6:7], v[14:15], v[2:3] 1461; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[12:13] 1462; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[14:15] 1463; GFX10-NEXT: s_setpc_b64 s[30:31] 1464 %fdiv = fdiv afn arcp <2 x double> %a, %b, !fpmath !0 1465 ret <2 x double> %fdiv 1466} 1467 1468!0 = !{float 2.500000e+00} 1469