1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 6 7define float @v_pow_f32(float %x, float %y) { 8; GFX6-LABEL: v_pow_f32: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX6-NEXT: v_log_f32_e32 v0, v0 12; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 13; GFX6-NEXT: v_exp_f32_e32 v0, v0 14; GFX6-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX8-LABEL: v_pow_f32: 17; GFX8: ; %bb.0: 18; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX8-NEXT: v_log_f32_e32 v0, v0 20; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 21; GFX8-NEXT: v_exp_f32_e32 v0, v0 22; GFX8-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX9-LABEL: v_pow_f32: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: v_log_f32_e32 v0, v0 28; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 29; GFX9-NEXT: v_exp_f32_e32 v0, v0 30; GFX9-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10-LABEL: v_pow_f32: 33; GFX10: ; %bb.0: 34; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 36; GFX10-NEXT: v_log_f32_e32 v0, v0 37; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 38; GFX10-NEXT: v_exp_f32_e32 v0, v0 39; GFX10-NEXT: s_setpc_b64 s[30:31] 40 %pow = call float @llvm.pow.f32(float %x, float %y) 41 ret float %pow 42} 43 44define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { 45; GFX6-LABEL: v_pow_v2f32: 46; GFX6: ; %bb.0: 47; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX6-NEXT: v_log_f32_e32 v0, v0 49; GFX6-NEXT: v_log_f32_e32 v1, v1 50; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 51; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 52; GFX6-NEXT: v_exp_f32_e32 v0, v0 53; GFX6-NEXT: v_exp_f32_e32 v1, v1 54; GFX6-NEXT: s_setpc_b64 s[30:31] 55; 56; GFX8-LABEL: v_pow_v2f32: 57; GFX8: ; %bb.0: 58; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX8-NEXT: v_log_f32_e32 v0, v0 60; GFX8-NEXT: v_log_f32_e32 v1, v1 61; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 62; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 63; GFX8-NEXT: v_exp_f32_e32 v0, v0 64; GFX8-NEXT: v_exp_f32_e32 v1, v1 65; GFX8-NEXT: s_setpc_b64 s[30:31] 66; 67; GFX9-LABEL: v_pow_v2f32: 68; GFX9: ; %bb.0: 69; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; GFX9-NEXT: v_log_f32_e32 v0, v0 71; GFX9-NEXT: v_log_f32_e32 v1, v1 72; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 73; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 74; GFX9-NEXT: v_exp_f32_e32 v0, v0 75; GFX9-NEXT: v_exp_f32_e32 v1, v1 76; GFX9-NEXT: s_setpc_b64 s[30:31] 77; 78; GFX10-LABEL: v_pow_v2f32: 79; GFX10: ; %bb.0: 80; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 82; GFX10-NEXT: v_log_f32_e32 v0, v0 83; GFX10-NEXT: v_log_f32_e32 v1, v1 84; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 85; GFX10-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 86; GFX10-NEXT: v_exp_f32_e32 v0, v0 87; GFX10-NEXT: v_exp_f32_e32 v1, v1 88; GFX10-NEXT: s_setpc_b64 s[30:31] 89 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) 90 ret <2 x float> %pow 91} 92 93define half @v_pow_f16(half %x, half %y) { 94; GFX6-LABEL: v_pow_f16: 95; GFX6: ; %bb.0: 96; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 97; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 98; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 99; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 100; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 101; GFX6-NEXT: v_log_f32_e32 v0, v0 102; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 103; GFX6-NEXT: v_exp_f32_e32 v0, v0 104; GFX6-NEXT: s_setpc_b64 s[30:31] 105; 106; GFX8-LABEL: v_pow_f16: 107; GFX8: ; %bb.0: 108; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 109; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 110; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 111; GFX8-NEXT: v_log_f32_e32 v0, v0 112; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 113; GFX8-NEXT: v_exp_f32_e32 v0, v0 114; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 115; GFX8-NEXT: s_setpc_b64 s[30:31] 116; 117; GFX9-LABEL: v_pow_f16: 118; GFX9: ; %bb.0: 119; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 120; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 121; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 122; GFX9-NEXT: v_log_f32_e32 v0, v0 123; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 124; GFX9-NEXT: v_exp_f32_e32 v0, v0 125; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 126; GFX9-NEXT: s_setpc_b64 s[30:31] 127; 128; GFX10-LABEL: v_pow_f16: 129; GFX10: ; %bb.0: 130; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 131; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 132; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 133; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 134; GFX10-NEXT: v_log_f32_e32 v0, v0 135; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 136; GFX10-NEXT: v_exp_f32_e32 v0, v0 137; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 138; GFX10-NEXT: s_setpc_b64 s[30:31] 139 %pow = call half @llvm.pow.f16(half %x, half %y) 140 ret half %pow 141} 142 143define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { 144; GFX6-LABEL: v_pow_v2f16: 145; GFX6: ; %bb.0: 146; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 147; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 148; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 149; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 150; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 151; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 152; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 153; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 154; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 155; GFX6-NEXT: v_log_f32_e32 v0, v0 156; GFX6-NEXT: v_log_f32_e32 v1, v1 157; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 158; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 159; GFX6-NEXT: v_exp_f32_e32 v0, v0 160; GFX6-NEXT: v_exp_f32_e32 v1, v1 161; GFX6-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX8-LABEL: v_pow_v2f16: 164; GFX8: ; %bb.0: 165; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 167; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 168; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 169; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 170; GFX8-NEXT: v_log_f32_e32 v2, v2 171; GFX8-NEXT: v_log_f32_e32 v0, v0 172; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 173; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 174; GFX8-NEXT: v_exp_f32_e32 v0, v0 175; GFX8-NEXT: v_exp_f32_e32 v2, v2 176; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 177; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 178; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 179; GFX8-NEXT: s_setpc_b64 s[30:31] 180; 181; GFX9-LABEL: v_pow_v2f16: 182; GFX9: ; %bb.0: 183; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 185; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 186; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 187; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 188; GFX9-NEXT: v_log_f32_e32 v2, v2 189; GFX9-NEXT: v_log_f32_e32 v0, v0 190; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 191; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 192; GFX9-NEXT: v_exp_f32_e32 v0, v0 193; GFX9-NEXT: v_exp_f32_e32 v2, v2 194; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 195; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 196; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 197; GFX9-NEXT: s_setpc_b64 s[30:31] 198; 199; GFX10-LABEL: v_pow_v2f16: 200; GFX10: ; %bb.0: 201; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 202; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 203; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 204; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 205; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 206; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 207; GFX10-NEXT: v_log_f32_e32 v2, v2 208; GFX10-NEXT: v_log_f32_e32 v0, v0 209; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 210; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 211; GFX10-NEXT: v_exp_f32_e32 v1, v2 212; GFX10-NEXT: v_exp_f32_e32 v0, v0 213; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 214; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 215; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 216; GFX10-NEXT: s_setpc_b64 s[30:31] 217 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) 218 ret <2 x half> %pow 219} 220 221define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { 222; GFX6-LABEL: v_pow_v2f16_fneg_lhs: 223; GFX6: ; %bb.0: 224; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 226; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 227; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 228; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 229; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 230; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 231; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v3 232; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 233; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 234; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 235; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 236; GFX6-NEXT: v_log_f32_e32 v3, v3 237; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 238; GFX6-NEXT: v_log_f32_e32 v4, v0 239; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v3 240; GFX6-NEXT: v_exp_f32_e32 v0, v0 241; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v4 242; GFX6-NEXT: v_exp_f32_e32 v1, v1 243; GFX6-NEXT: s_setpc_b64 s[30:31] 244; 245; GFX8-LABEL: v_pow_v2f16_fneg_lhs: 246; GFX8: ; %bb.0: 247; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 248; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 249; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 250; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 251; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 252; GFX8-NEXT: v_log_f32_e32 v2, v2 253; GFX8-NEXT: v_log_f32_e32 v0, v0 254; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 255; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 256; GFX8-NEXT: v_exp_f32_e32 v0, v0 257; GFX8-NEXT: v_exp_f32_e32 v2, v2 258; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 259; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 260; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 261; GFX8-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX9-LABEL: v_pow_v2f16_fneg_lhs: 264; GFX9: ; %bb.0: 265; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 267; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 268; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 269; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 270; GFX9-NEXT: v_log_f32_e32 v2, v2 271; GFX9-NEXT: v_log_f32_e32 v0, v0 272; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 273; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 274; GFX9-NEXT: v_exp_f32_e32 v0, v0 275; GFX9-NEXT: v_exp_f32_e32 v2, v2 276; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 277; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 278; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 279; GFX9-NEXT: s_setpc_b64 s[30:31] 280; 281; GFX10-LABEL: v_pow_v2f16_fneg_lhs: 282; GFX10: ; %bb.0: 283; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 284; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 285;GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 286;GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 287;GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 288;GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 289;GFX10-NEXT: v_log_f32_e32 v2, v2 290;GFX10-NEXT: v_log_f32_e32 v0, v0 291;GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 292;GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 293;GFX10-NEXT: v_exp_f32_e32 v1, v2 294;GFX10-NEXT: v_exp_f32_e32 v0, v0 295;GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 296;GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 297;GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 298; GFX10-NEXT: s_setpc_b64 s[30:31] 299 %x.fneg = fneg <2 x half> %x 300 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) 301 ret <2 x half> %pow 302} 303 304define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { 305; GFX6-LABEL: v_pow_v2f16_fneg_rhs: 306; GFX6: ; %bb.0: 307; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 309; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 310; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 311; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 312; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 313; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 314; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 315; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 316; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 317; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 318; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 319; GFX6-NEXT: v_log_f32_e32 v0, v0 320; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 321; GFX6-NEXT: v_log_f32_e32 v1, v1 322; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 323; GFX6-NEXT: v_exp_f32_e32 v0, v0 324; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 325; GFX6-NEXT: v_exp_f32_e32 v1, v1 326; GFX6-NEXT: s_setpc_b64 s[30:31] 327; 328; GFX8-LABEL: v_pow_v2f16_fneg_rhs: 329; GFX8: ; %bb.0: 330; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 331; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 332; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 333; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 334; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 335; GFX8-NEXT: v_log_f32_e32 v2, v2 336; GFX8-NEXT: v_log_f32_e32 v0, v0 337; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 338; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 339; GFX8-NEXT: v_exp_f32_e32 v0, v0 340; GFX8-NEXT: v_exp_f32_e32 v2, v2 341; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 342; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 343; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 344; GFX8-NEXT: s_setpc_b64 s[30:31] 345; 346; GFX9-LABEL: v_pow_v2f16_fneg_rhs: 347; GFX9: ; %bb.0: 348; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 350; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 351; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 352; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 353; GFX9-NEXT: v_log_f32_e32 v2, v2 354; GFX9-NEXT: v_log_f32_e32 v0, v0 355; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 356; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 357; GFX9-NEXT: v_exp_f32_e32 v0, v0 358; GFX9-NEXT: v_exp_f32_e32 v2, v2 359; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 360; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 361; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 362; GFX9-NEXT: s_setpc_b64 s[30:31] 363; 364; GFX10-LABEL: v_pow_v2f16_fneg_rhs: 365; GFX10: ; %bb.0: 366; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 367; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 368;GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 369;GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 370;GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 371;GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 372;GFX10-NEXT: v_log_f32_e32 v2, v2 373;GFX10-NEXT: v_log_f32_e32 v0, v0 374;GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 375;GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 376;GFX10-NEXT: v_exp_f32_e32 v1, v2 377;GFX10-NEXT: v_exp_f32_e32 v0, v0 378;GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 379;GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 380;GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 381; GFX10-NEXT: s_setpc_b64 s[30:31] 382 %y.fneg = fneg <2 x half> %y 383 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) 384 ret <2 x half> %pow 385} 386 387define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { 388; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: 389; GFX6: ; %bb.0: 390; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 391; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 392; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 393; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 394; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 395; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 396; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 397; GFX6-NEXT: s_mov_b32 s4, 0x80008000 398; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0 399; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 400; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 401; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 402; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 403; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 404; GFX6-NEXT: v_xor_b32_e32 v2, s4, v2 405; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 406; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 407; GFX6-NEXT: v_log_f32_e32 v0, v0 408; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 409; GFX6-NEXT: v_log_f32_e32 v1, v1 410; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 411; GFX6-NEXT: v_exp_f32_e32 v0, v0 412; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 413; GFX6-NEXT: v_exp_f32_e32 v1, v1 414; GFX6-NEXT: s_setpc_b64 s[30:31] 415; 416; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: 417; GFX8: ; %bb.0: 418; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 420; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 421; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 422; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 423; GFX8-NEXT: v_log_f32_e32 v2, v2 424; GFX8-NEXT: v_log_f32_e32 v0, v0 425; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 426; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 427; GFX8-NEXT: v_exp_f32_e32 v0, v0 428; GFX8-NEXT: v_exp_f32_e32 v2, v2 429; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 430; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 431; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 432; GFX8-NEXT: s_setpc_b64 s[30:31] 433; 434; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: 435; GFX9: ; %bb.0: 436; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 438; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 439; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 440; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 441; GFX9-NEXT: v_log_f32_e32 v2, v2 442; GFX9-NEXT: v_log_f32_e32 v0, v0 443; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 444; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 445; GFX9-NEXT: v_exp_f32_e32 v0, v0 446; GFX9-NEXT: v_exp_f32_e32 v2, v2 447; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 448; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 449; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 450; GFX9-NEXT: s_setpc_b64 s[30:31] 451; 452; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: 453; GFX10: ; %bb.0: 454; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 455; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 456; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 457; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 458; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 459; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 460; GFX10-NEXT: v_log_f32_e32 v2, v2 461; GFX10-NEXT: v_log_f32_e32 v0, v0 462; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 463; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 464; GFX10-NEXT: v_exp_f32_e32 v1, v2 465; GFX10-NEXT: v_exp_f32_e32 v0, v0 466; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 467; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 468; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 469; GFX10-NEXT: s_setpc_b64 s[30:31] 470 %x.fneg = fneg <2 x half> %x 471 %y.fneg = fneg <2 x half> %y 472 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) 473 ret <2 x half> %pow 474} 475 476; FIXME 477; define double @v_pow_f64(double %x, double %y) { 478; %pow = call double @llvm.pow.f64(double %x, double %y) 479; ret double %pow 480; } 481 482define float @v_pow_f32_fabs_lhs(float %x, float %y) { 483; GFX6-LABEL: v_pow_f32_fabs_lhs: 484; GFX6: ; %bb.0: 485; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX6-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 487; GFX6-NEXT: v_log_f32_e32 v0, v0 488; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 489; GFX6-NEXT: v_exp_f32_e32 v0, v0 490; GFX6-NEXT: s_setpc_b64 s[30:31] 491; 492; GFX8-LABEL: v_pow_f32_fabs_lhs: 493; GFX8: ; %bb.0: 494; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 495; GFX8-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 496; GFX8-NEXT: v_log_f32_e32 v0, v0 497; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 498; GFX8-NEXT: v_exp_f32_e32 v0, v0 499; GFX8-NEXT: s_setpc_b64 s[30:31] 500; 501; GFX9-LABEL: v_pow_f32_fabs_lhs: 502; GFX9: ; %bb.0: 503; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 504; GFX9-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 505; GFX9-NEXT: v_log_f32_e32 v0, v0 506; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 507; GFX9-NEXT: v_exp_f32_e32 v0, v0 508; GFX9-NEXT: s_setpc_b64 s[30:31] 509; 510; GFX10-LABEL: v_pow_f32_fabs_lhs: 511; GFX10: ; %bb.0: 512; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 513; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 514; GFX10-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 515; GFX10-NEXT: v_log_f32_e32 v0, v0 516; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 517; GFX10-NEXT: v_exp_f32_e32 v0, v0 518; GFX10-NEXT: s_setpc_b64 s[30:31] 519 %fabs.x = call float @llvm.fabs.f32(float %x) 520 %pow = call float @llvm.pow.f32(float %fabs.x, float %y) 521 ret float %pow 522} 523 524define float @v_pow_f32_fabs_rhs(float %x, float %y) { 525; GFX6-LABEL: v_pow_f32_fabs_rhs: 526; GFX6: ; %bb.0: 527; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; GFX6-NEXT: v_log_f32_e32 v0, v0 529; GFX6-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 530; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 531; GFX6-NEXT: v_exp_f32_e32 v0, v0 532; GFX6-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX8-LABEL: v_pow_f32_fabs_rhs: 535; GFX8: ; %bb.0: 536; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX8-NEXT: v_log_f32_e32 v0, v0 538; GFX8-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 539; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 540; GFX8-NEXT: v_exp_f32_e32 v0, v0 541; GFX8-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX9-LABEL: v_pow_f32_fabs_rhs: 544; GFX9: ; %bb.0: 545; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX9-NEXT: v_log_f32_e32 v0, v0 547; GFX9-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 548; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 549; GFX9-NEXT: v_exp_f32_e32 v0, v0 550; GFX9-NEXT: s_setpc_b64 s[30:31] 551; 552; GFX10-LABEL: v_pow_f32_fabs_rhs: 553; GFX10: ; %bb.0: 554; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 555; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 556; GFX10-NEXT: v_log_f32_e32 v0, v0 557; GFX10-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1 558; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 559; GFX10-NEXT: v_exp_f32_e32 v0, v0 560; GFX10-NEXT: s_setpc_b64 s[30:31] 561 %fabs.y = call float @llvm.fabs.f32(float %y) 562 %pow = call float @llvm.pow.f32(float %x, float %fabs.y) 563 ret float %pow 564} 565 566define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { 567; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: 568; GFX6: ; %bb.0: 569; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 570; GFX6-NEXT: s_brev_b32 s4, -2 571; GFX6-NEXT: v_and_b32_e32 v0, s4, v0 572; GFX6-NEXT: v_log_f32_e32 v0, v0 573; GFX6-NEXT: v_and_b32_e32 v1, s4, v1 574; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 575; GFX6-NEXT: v_exp_f32_e32 v0, v0 576; GFX6-NEXT: s_setpc_b64 s[30:31] 577; 578; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: 579; GFX8: ; %bb.0: 580; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; GFX8-NEXT: s_brev_b32 s4, -2 582; GFX8-NEXT: v_and_b32_e32 v0, s4, v0 583; GFX8-NEXT: v_log_f32_e32 v0, v0 584; GFX8-NEXT: v_and_b32_e32 v1, s4, v1 585; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 586; GFX8-NEXT: v_exp_f32_e32 v0, v0 587; GFX8-NEXT: s_setpc_b64 s[30:31] 588; 589; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: 590; GFX9: ; %bb.0: 591; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 592; GFX9-NEXT: s_brev_b32 s4, -2 593; GFX9-NEXT: v_and_b32_e32 v0, s4, v0 594; GFX9-NEXT: v_log_f32_e32 v0, v0 595; GFX9-NEXT: v_and_b32_e32 v1, s4, v1 596; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 597; GFX9-NEXT: v_exp_f32_e32 v0, v0 598; GFX9-NEXT: s_setpc_b64 s[30:31] 599; 600; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: 601; GFX10: ; %bb.0: 602; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 603; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 604; GFX10-NEXT: s_brev_b32 s4, -2 605; GFX10-NEXT: v_and_b32_e32 v0, s4, v0 606; GFX10-NEXT: v_and_b32_e32 v1, s4, v1 607; GFX10-NEXT: v_log_f32_e32 v0, v0 608; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 609; GFX10-NEXT: v_exp_f32_e32 v0, v0 610; GFX10-NEXT: s_setpc_b64 s[30:31] 611 %fabs.x = call float @llvm.fabs.f32(float %x) 612 %fabs.y = call float @llvm.fabs.f32(float %y) 613 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) 614 ret float %pow 615} 616 617define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { 618; GFX6-LABEL: v_pow_f32_sgpr_vgpr: 619; GFX6: ; %bb.0: 620; GFX6-NEXT: v_log_f32_e32 v1, s0 621; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 622; GFX6-NEXT: v_exp_f32_e32 v0, v0 623; GFX6-NEXT: ; return to shader part epilog 624; 625; GFX8-LABEL: v_pow_f32_sgpr_vgpr: 626; GFX8: ; %bb.0: 627; GFX8-NEXT: v_log_f32_e32 v1, s0 628; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 629; GFX8-NEXT: v_exp_f32_e32 v0, v0 630; GFX8-NEXT: ; return to shader part epilog 631; 632; GFX9-LABEL: v_pow_f32_sgpr_vgpr: 633; GFX9: ; %bb.0: 634; GFX9-NEXT: v_log_f32_e32 v1, s0 635; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 636; GFX9-NEXT: v_exp_f32_e32 v0, v0 637; GFX9-NEXT: ; return to shader part epilog 638; 639; GFX10-LABEL: v_pow_f32_sgpr_vgpr: 640; GFX10: ; %bb.0: 641; GFX10-NEXT: v_log_f32_e32 v1, s0 642; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 643; GFX10-NEXT: v_exp_f32_e32 v0, v0 644; GFX10-NEXT: ; return to shader part epilog 645 %pow = call float @llvm.pow.f32(float %x, float %y) 646 ret float %pow 647} 648 649define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { 650; GFX6-LABEL: v_pow_f32_vgpr_sgpr: 651; GFX6: ; %bb.0: 652; GFX6-NEXT: v_log_f32_e32 v0, v0 653; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 654; GFX6-NEXT: v_exp_f32_e32 v0, v0 655; GFX6-NEXT: ; return to shader part epilog 656; 657; GFX8-LABEL: v_pow_f32_vgpr_sgpr: 658; GFX8: ; %bb.0: 659; GFX8-NEXT: v_log_f32_e32 v0, v0 660; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 661; GFX8-NEXT: v_exp_f32_e32 v0, v0 662; GFX8-NEXT: ; return to shader part epilog 663; 664; GFX9-LABEL: v_pow_f32_vgpr_sgpr: 665; GFX9: ; %bb.0: 666; GFX9-NEXT: v_log_f32_e32 v0, v0 667; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 668; GFX9-NEXT: v_exp_f32_e32 v0, v0 669; GFX9-NEXT: ; return to shader part epilog 670; 671; GFX10-LABEL: v_pow_f32_vgpr_sgpr: 672; GFX10: ; %bb.0: 673; GFX10-NEXT: v_log_f32_e32 v0, v0 674; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 675; GFX10-NEXT: v_exp_f32_e32 v0, v0 676; GFX10-NEXT: ; return to shader part epilog 677 %pow = call float @llvm.pow.f32(float %x, float %y) 678 ret float %pow 679} 680 681define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { 682; GFX6-LABEL: v_pow_f32_sgpr_sgpr: 683; GFX6: ; %bb.0: 684; GFX6-NEXT: v_log_f32_e32 v0, s0 685; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 686; GFX6-NEXT: v_exp_f32_e32 v0, v0 687; GFX6-NEXT: ; return to shader part epilog 688; 689; GFX8-LABEL: v_pow_f32_sgpr_sgpr: 690; GFX8: ; %bb.0: 691; GFX8-NEXT: v_log_f32_e32 v0, s0 692; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 693; GFX8-NEXT: v_exp_f32_e32 v0, v0 694; GFX8-NEXT: ; return to shader part epilog 695; 696; GFX9-LABEL: v_pow_f32_sgpr_sgpr: 697; GFX9: ; %bb.0: 698; GFX9-NEXT: v_log_f32_e32 v0, s0 699; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 700; GFX9-NEXT: v_exp_f32_e32 v0, v0 701; GFX9-NEXT: ; return to shader part epilog 702; 703; GFX10-LABEL: v_pow_f32_sgpr_sgpr: 704; GFX10: ; %bb.0: 705; GFX10-NEXT: v_log_f32_e32 v0, s0 706; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 707; GFX10-NEXT: v_exp_f32_e32 v0, v0 708; GFX10-NEXT: ; return to shader part epilog 709 %pow = call float @llvm.pow.f32(float %x, float %y) 710 ret float %pow 711} 712 713declare half @llvm.pow.f16(half, half) 714declare float @llvm.pow.f32(float, float) 715declare double @llvm.pow.f64(double, double) 716 717declare half @llvm.fabs.f16(half) 718declare float @llvm.fabs.f32(float) 719 720declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) 721declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 722