1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -march=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s 4; RUN: llc -global-isel -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 5; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 6; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 7 8define float @v_roundeven_f32(float %x) { 9; GFX6-LABEL: v_roundeven_f32: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX6-NEXT: v_rndne_f32_e32 v0, v0 13; GFX6-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX7-LABEL: v_roundeven_f32: 16; GFX7: ; %bb.0: 17; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX7-NEXT: v_rndne_f32_e32 v0, v0 19; GFX7-NEXT: s_setpc_b64 s[30:31] 20; 21; GFX8-LABEL: v_roundeven_f32: 22; GFX8: ; %bb.0: 23; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; GFX8-NEXT: v_rndne_f32_e32 v0, v0 25; GFX8-NEXT: s_setpc_b64 s[30:31] 26; 27; GFX9-LABEL: v_roundeven_f32: 28; GFX9: ; %bb.0: 29; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX9-NEXT: v_rndne_f32_e32 v0, v0 31; GFX9-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX10-LABEL: v_roundeven_f32: 34; GFX10: ; %bb.0: 35; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 37; GFX10-NEXT: v_rndne_f32_e32 v0, v0 38; GFX10-NEXT: s_setpc_b64 s[30:31] 39 %roundeven = call float @llvm.roundeven.f32(float %x) 40 ret float %roundeven 41} 42 43define <2 x float> @v_roundeven_v2f32(<2 x float> %x) { 44; GFX6-LABEL: v_roundeven_v2f32: 45; GFX6: ; %bb.0: 46; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GFX6-NEXT: v_rndne_f32_e32 v0, v0 48; GFX6-NEXT: v_rndne_f32_e32 v1, v1 49; GFX6-NEXT: s_setpc_b64 s[30:31] 50; 51; GFX7-LABEL: v_roundeven_v2f32: 52; GFX7: ; %bb.0: 53; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; GFX7-NEXT: v_rndne_f32_e32 v0, v0 55; GFX7-NEXT: v_rndne_f32_e32 v1, v1 56; GFX7-NEXT: s_setpc_b64 s[30:31] 57; 58; GFX8-LABEL: v_roundeven_v2f32: 59; GFX8: ; %bb.0: 60; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX8-NEXT: v_rndne_f32_e32 v0, v0 62; GFX8-NEXT: v_rndne_f32_e32 v1, v1 63; GFX8-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX9-LABEL: v_roundeven_v2f32: 66; GFX9: ; %bb.0: 67; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX9-NEXT: v_rndne_f32_e32 v0, v0 69; GFX9-NEXT: v_rndne_f32_e32 v1, v1 70; GFX9-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX10-LABEL: v_roundeven_v2f32: 73; GFX10: ; %bb.0: 74; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 76; GFX10-NEXT: v_rndne_f32_e32 v0, v0 77; GFX10-NEXT: v_rndne_f32_e32 v1, v1 78; GFX10-NEXT: s_setpc_b64 s[30:31] 79 %roundeven = call <2 x float> @llvm.roundeven.v2f32(<2 x float> %x) 80 ret <2 x float> %roundeven 81} 82 83define <3 x float> @v_roundeven_v3f32(<3 x float> %x) { 84; GFX6-LABEL: v_roundeven_v3f32: 85; GFX6: ; %bb.0: 86; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX6-NEXT: v_rndne_f32_e32 v0, v0 88; GFX6-NEXT: v_rndne_f32_e32 v1, v1 89; GFX6-NEXT: v_rndne_f32_e32 v2, v2 90; GFX6-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX7-LABEL: v_roundeven_v3f32: 93; GFX7: ; %bb.0: 94; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX7-NEXT: v_rndne_f32_e32 v0, v0 96; GFX7-NEXT: v_rndne_f32_e32 v1, v1 97; GFX7-NEXT: v_rndne_f32_e32 v2, v2 98; GFX7-NEXT: s_setpc_b64 s[30:31] 99; 100; GFX8-LABEL: v_roundeven_v3f32: 101; GFX8: ; %bb.0: 102; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX8-NEXT: v_rndne_f32_e32 v0, v0 104; GFX8-NEXT: v_rndne_f32_e32 v1, v1 105; GFX8-NEXT: v_rndne_f32_e32 v2, v2 106; GFX8-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX9-LABEL: v_roundeven_v3f32: 109; GFX9: ; %bb.0: 110; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX9-NEXT: v_rndne_f32_e32 v0, v0 112; GFX9-NEXT: v_rndne_f32_e32 v1, v1 113; GFX9-NEXT: v_rndne_f32_e32 v2, v2 114; GFX9-NEXT: s_setpc_b64 s[30:31] 115; 116; GFX10-LABEL: v_roundeven_v3f32: 117; GFX10: ; %bb.0: 118; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 119; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 120; GFX10-NEXT: v_rndne_f32_e32 v0, v0 121; GFX10-NEXT: v_rndne_f32_e32 v1, v1 122; GFX10-NEXT: v_rndne_f32_e32 v2, v2 123; GFX10-NEXT: s_setpc_b64 s[30:31] 124 %roundeven = call <3 x float> @llvm.roundeven.v3f32(<3 x float> %x) 125 ret <3 x float> %roundeven 126} 127 128define <4 x float> @v_roundeven_v4f32(<4 x float> %x) { 129; GFX6-LABEL: v_roundeven_v4f32: 130; GFX6: ; %bb.0: 131; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 132; GFX6-NEXT: v_rndne_f32_e32 v0, v0 133; GFX6-NEXT: v_rndne_f32_e32 v1, v1 134; GFX6-NEXT: v_rndne_f32_e32 v2, v2 135; GFX6-NEXT: v_rndne_f32_e32 v3, v3 136; GFX6-NEXT: s_setpc_b64 s[30:31] 137; 138; GFX7-LABEL: v_roundeven_v4f32: 139; GFX7: ; %bb.0: 140; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX7-NEXT: v_rndne_f32_e32 v0, v0 142; GFX7-NEXT: v_rndne_f32_e32 v1, v1 143; GFX7-NEXT: v_rndne_f32_e32 v2, v2 144; GFX7-NEXT: v_rndne_f32_e32 v3, v3 145; GFX7-NEXT: s_setpc_b64 s[30:31] 146; 147; GFX8-LABEL: v_roundeven_v4f32: 148; GFX8: ; %bb.0: 149; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX8-NEXT: v_rndne_f32_e32 v0, v0 151; GFX8-NEXT: v_rndne_f32_e32 v1, v1 152; GFX8-NEXT: v_rndne_f32_e32 v2, v2 153; GFX8-NEXT: v_rndne_f32_e32 v3, v3 154; GFX8-NEXT: s_setpc_b64 s[30:31] 155; 156; GFX9-LABEL: v_roundeven_v4f32: 157; GFX9: ; %bb.0: 158; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX9-NEXT: v_rndne_f32_e32 v0, v0 160; GFX9-NEXT: v_rndne_f32_e32 v1, v1 161; GFX9-NEXT: v_rndne_f32_e32 v2, v2 162; GFX9-NEXT: v_rndne_f32_e32 v3, v3 163; GFX9-NEXT: s_setpc_b64 s[30:31] 164; 165; GFX10-LABEL: v_roundeven_v4f32: 166; GFX10: ; %bb.0: 167; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 169; GFX10-NEXT: v_rndne_f32_e32 v0, v0 170; GFX10-NEXT: v_rndne_f32_e32 v1, v1 171; GFX10-NEXT: v_rndne_f32_e32 v2, v2 172; GFX10-NEXT: v_rndne_f32_e32 v3, v3 173; GFX10-NEXT: s_setpc_b64 s[30:31] 174 %roundeven = call <4 x float> @llvm.roundeven.v4f32(<4 x float> %x) 175 ret <4 x float> %roundeven 176} 177 178define half @v_roundeven_f16(half %x) { 179; GFX6-LABEL: v_roundeven_f16: 180; GFX6: ; %bb.0: 181; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 183; GFX6-NEXT: v_rndne_f32_e32 v0, v0 184; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 185; GFX6-NEXT: s_setpc_b64 s[30:31] 186; 187; GFX7-LABEL: v_roundeven_f16: 188; GFX7: ; %bb.0: 189; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 191; GFX7-NEXT: v_rndne_f32_e32 v0, v0 192; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 193; GFX7-NEXT: s_setpc_b64 s[30:31] 194; 195; GFX8-LABEL: v_roundeven_f16: 196; GFX8: ; %bb.0: 197; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX8-NEXT: v_rndne_f16_e32 v0, v0 199; GFX8-NEXT: s_setpc_b64 s[30:31] 200; 201; GFX9-LABEL: v_roundeven_f16: 202; GFX9: ; %bb.0: 203; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 204; GFX9-NEXT: v_rndne_f16_e32 v0, v0 205; GFX9-NEXT: s_setpc_b64 s[30:31] 206; 207; GFX10-LABEL: v_roundeven_f16: 208; GFX10: ; %bb.0: 209; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 211; GFX10-NEXT: v_rndne_f16_e32 v0, v0 212; GFX10-NEXT: s_setpc_b64 s[30:31] 213 %roundeven = call half @llvm.roundeven.f16(half %x) 214 ret half %roundeven 215} 216 217define <2 x half> @v_roundeven_v2f16(<2 x half> %x) { 218; GFX6-LABEL: v_roundeven_v2f16: 219; GFX6: ; %bb.0: 220; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 222; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 223; GFX6-NEXT: v_rndne_f32_e32 v0, v0 224; GFX6-NEXT: v_rndne_f32_e32 v1, v1 225; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 226; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 227; GFX6-NEXT: s_setpc_b64 s[30:31] 228; 229; GFX7-LABEL: v_roundeven_v2f16: 230; GFX7: ; %bb.0: 231; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 233; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 234; GFX7-NEXT: v_rndne_f32_e32 v0, v0 235; GFX7-NEXT: v_rndne_f32_e32 v1, v1 236; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 237; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 238; GFX7-NEXT: s_setpc_b64 s[30:31] 239; 240; GFX8-LABEL: v_roundeven_v2f16: 241; GFX8: ; %bb.0: 242; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; GFX8-NEXT: v_rndne_f16_e32 v1, v0 244; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 245; GFX8-NEXT: v_mov_b32_e32 v2, 16 246; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 247; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 248; GFX8-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX9-LABEL: v_roundeven_v2f16: 251; GFX9: ; %bb.0: 252; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX9-NEXT: v_rndne_f16_e32 v1, v0 254; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 255; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 256; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 257; GFX9-NEXT: s_setpc_b64 s[30:31] 258; 259; GFX10-LABEL: v_roundeven_v2f16: 260; GFX10: ; %bb.0: 261; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 262; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 263; GFX10-NEXT: v_rndne_f16_e32 v1, v0 264; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 265; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 266; GFX10-NEXT: s_setpc_b64 s[30:31] 267 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x) 268 ret <2 x half> %roundeven 269} 270 271define <2 x half> @v_roundeven_v2f16_fneg(<2 x half> %x) { 272; GFX6-LABEL: v_roundeven_v2f16_fneg: 273; GFX6: ; %bb.0: 274; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 275; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 276; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 277; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 278; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 279; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v0 280; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 281; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v0 282; GFX6-NEXT: v_rndne_f32_e32 v0, v1 283; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 284; GFX6-NEXT: v_rndne_f32_e32 v1, v2 285; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 286; GFX6-NEXT: s_setpc_b64 s[30:31] 287; 288; GFX7-LABEL: v_roundeven_v2f16_fneg: 289; GFX7: ; %bb.0: 290; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 291; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 292; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 293; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 294; GFX7-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 295; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v0 296; GFX7-NEXT: v_lshrrev_b32_e32 v0, 16, v0 297; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v0 298; GFX7-NEXT: v_rndne_f32_e32 v0, v1 299; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 300; GFX7-NEXT: v_rndne_f32_e32 v1, v2 301; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 302; GFX7-NEXT: s_setpc_b64 s[30:31] 303; 304; GFX8-LABEL: v_roundeven_v2f16_fneg: 305; GFX8: ; %bb.0: 306; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 307; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 308; GFX8-NEXT: v_rndne_f16_e32 v1, v0 309; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 310; GFX8-NEXT: v_mov_b32_e32 v2, 16 311; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 312; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 313; GFX8-NEXT: s_setpc_b64 s[30:31] 314; 315; GFX9-LABEL: v_roundeven_v2f16_fneg: 316; GFX9: ; %bb.0: 317; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 318; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 319; GFX9-NEXT: v_rndne_f16_e32 v1, v0 320; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 321; GFX9-NEXT: v_mov_b32_e32 v2, 0xffff 322; GFX9-NEXT: v_and_or_b32 v0, v1, v2, v0 323; GFX9-NEXT: s_setpc_b64 s[30:31] 324; 325; GFX10-LABEL: v_roundeven_v2f16_fneg: 326; GFX10: ; %bb.0: 327; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 328; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 329; GFX10-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 330; GFX10-NEXT: v_rndne_f16_e32 v1, v0 331; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 332; GFX10-NEXT: v_and_or_b32 v0, 0xffff, v1, v0 333; GFX10-NEXT: s_setpc_b64 s[30:31] 334 %x.fneg = fneg <2 x half> %x 335 %roundeven = call <2 x half> @llvm.roundeven.v2f16(<2 x half> %x.fneg) 336 ret <2 x half> %roundeven 337} 338 339define <4 x half> @v_roundeven_v4f16(<4 x half> %x) { 340; GFX6-LABEL: v_roundeven_v4f16: 341; GFX6: ; %bb.0: 342; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 343; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 344; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 345; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 346; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 347; GFX6-NEXT: v_rndne_f32_e32 v0, v0 348; GFX6-NEXT: v_rndne_f32_e32 v1, v1 349; GFX6-NEXT: v_rndne_f32_e32 v2, v2 350; GFX6-NEXT: v_rndne_f32_e32 v3, v3 351; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 352; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 353; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 354; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 355; GFX6-NEXT: s_setpc_b64 s[30:31] 356; 357; GFX7-LABEL: v_roundeven_v4f16: 358; GFX7: ; %bb.0: 359; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 360; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 361; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 362; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 363; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 364; GFX7-NEXT: v_rndne_f32_e32 v0, v0 365; GFX7-NEXT: v_rndne_f32_e32 v1, v1 366; GFX7-NEXT: v_rndne_f32_e32 v2, v2 367; GFX7-NEXT: v_rndne_f32_e32 v3, v3 368; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 369; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 370; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 371; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 372; GFX7-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX8-LABEL: v_roundeven_v4f16: 375; GFX8: ; %bb.0: 376; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX8-NEXT: v_rndne_f16_e32 v2, v0 378; GFX8-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 379; GFX8-NEXT: v_rndne_f16_e32 v3, v1 380; GFX8-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 381; GFX8-NEXT: v_mov_b32_e32 v4, 16 382; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 383; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 384; GFX8-NEXT: v_or_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 385; GFX8-NEXT: v_or_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 386; GFX8-NEXT: s_setpc_b64 s[30:31] 387; 388; GFX9-LABEL: v_roundeven_v4f16: 389; GFX9: ; %bb.0: 390; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 391; GFX9-NEXT: v_rndne_f16_e32 v2, v0 392; GFX9-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 393; GFX9-NEXT: v_rndne_f16_e32 v3, v1 394; GFX9-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 395; GFX9-NEXT: v_mov_b32_e32 v4, 0xffff 396; GFX9-NEXT: v_and_or_b32 v0, v2, v4, v0 397; GFX9-NEXT: v_and_or_b32 v1, v3, v4, v1 398; GFX9-NEXT: s_setpc_b64 s[30:31] 399; 400; GFX10-LABEL: v_roundeven_v4f16: 401; GFX10: ; %bb.0: 402; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 403; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 404; GFX10-NEXT: v_rndne_f16_e32 v2, v0 405; GFX10-NEXT: v_rndne_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 406; GFX10-NEXT: v_rndne_f16_e32 v3, v1 407; GFX10-NEXT: v_mov_b32_e32 v4, 0xffff 408; GFX10-NEXT: v_rndne_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 409; GFX10-NEXT: v_and_or_b32 v0, v2, v4, v0 410; GFX10-NEXT: v_and_or_b32 v1, v3, v4, v1 411; GFX10-NEXT: s_setpc_b64 s[30:31] 412 %roundeven = call <4 x half> @llvm.roundeven.v4f16(<4 x half> %x) 413 ret <4 x half> %roundeven 414} 415 416 417define float @v_roundeven_f32_fabs(float %x) { 418; GFX6-LABEL: v_roundeven_f32_fabs: 419; GFX6: ; %bb.0: 420; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 421; GFX6-NEXT: v_rndne_f32_e64 v0, |v0| 422; GFX6-NEXT: s_setpc_b64 s[30:31] 423; 424; GFX7-LABEL: v_roundeven_f32_fabs: 425; GFX7: ; %bb.0: 426; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 427; GFX7-NEXT: v_rndne_f32_e64 v0, |v0| 428; GFX7-NEXT: s_setpc_b64 s[30:31] 429; 430; GFX8-LABEL: v_roundeven_f32_fabs: 431; GFX8: ; %bb.0: 432; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; GFX8-NEXT: v_rndne_f32_e64 v0, |v0| 434; GFX8-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX9-LABEL: v_roundeven_f32_fabs: 437; GFX9: ; %bb.0: 438; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX9-NEXT: v_rndne_f32_e64 v0, |v0| 440; GFX9-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX10-LABEL: v_roundeven_f32_fabs: 443; GFX10: ; %bb.0: 444; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 446; GFX10-NEXT: v_rndne_f32_e64 v0, |v0| 447; GFX10-NEXT: s_setpc_b64 s[30:31] 448 %fabs.x = call float @llvm.fabs.f32(float %x) 449 %roundeven = call float @llvm.roundeven.f32(float %fabs.x) 450 ret float %roundeven 451} 452 453define amdgpu_ps float @s_roundeven_f32(float inreg %x) { 454; GFX6-LABEL: s_roundeven_f32: 455; GFX6: ; %bb.0: 456; GFX6-NEXT: v_rndne_f32_e32 v0, s0 457; GFX6-NEXT: ; return to shader part epilog 458; 459; GFX7-LABEL: s_roundeven_f32: 460; GFX7: ; %bb.0: 461; GFX7-NEXT: v_rndne_f32_e32 v0, s0 462; GFX7-NEXT: ; return to shader part epilog 463; 464; GFX8-LABEL: s_roundeven_f32: 465; GFX8: ; %bb.0: 466; GFX8-NEXT: v_rndne_f32_e32 v0, s0 467; GFX8-NEXT: ; return to shader part epilog 468; 469; GFX9-LABEL: s_roundeven_f32: 470; GFX9: ; %bb.0: 471; GFX9-NEXT: v_rndne_f32_e32 v0, s0 472; GFX9-NEXT: ; return to shader part epilog 473; 474; GFX10-LABEL: s_roundeven_f32: 475; GFX10: ; %bb.0: 476; GFX10-NEXT: v_rndne_f32_e32 v0, s0 477; GFX10-NEXT: ; return to shader part epilog 478 %roundeven = call float @llvm.roundeven.f32(float %x) 479 ret float %roundeven 480} 481 482define float @v_roundeven_f32_fneg(float %x) { 483; GFX6-LABEL: v_roundeven_f32_fneg: 484; GFX6: ; %bb.0: 485; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX6-NEXT: v_rndne_f32_e64 v0, -v0 487; GFX6-NEXT: s_setpc_b64 s[30:31] 488; 489; GFX7-LABEL: v_roundeven_f32_fneg: 490; GFX7: ; %bb.0: 491; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX7-NEXT: v_rndne_f32_e64 v0, -v0 493; GFX7-NEXT: s_setpc_b64 s[30:31] 494; 495; GFX8-LABEL: v_roundeven_f32_fneg: 496; GFX8: ; %bb.0: 497; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498; GFX8-NEXT: v_rndne_f32_e64 v0, -v0 499; GFX8-NEXT: s_setpc_b64 s[30:31] 500; 501; GFX9-LABEL: v_roundeven_f32_fneg: 502; GFX9: ; %bb.0: 503; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 504; GFX9-NEXT: v_rndne_f32_e64 v0, -v0 505; GFX9-NEXT: s_setpc_b64 s[30:31] 506; 507; GFX10-LABEL: v_roundeven_f32_fneg: 508; GFX10: ; %bb.0: 509; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 511; GFX10-NEXT: v_rndne_f32_e64 v0, -v0 512; GFX10-NEXT: s_setpc_b64 s[30:31] 513 %neg.x = fneg float %x 514 %roundeven = call float @llvm.roundeven.f32(float %neg.x) 515 ret float %roundeven 516} 517 518define double @v_roundeven_f64(double %x) { 519; GFX6-LABEL: v_roundeven_f64: 520; GFX6: ; %bb.0: 521; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 522; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v1 523; GFX6-NEXT: v_mov_b32_e32 v2, 0 524; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3 525; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], v[2:3] 526; GFX6-NEXT: s_mov_b32 s4, -1 527; GFX6-NEXT: s_mov_b32 s5, 0x432fffff 528; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] 529; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 530; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 531; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 532; GFX6-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX7-LABEL: v_roundeven_f64: 535; GFX7: ; %bb.0: 536; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 538; GFX7-NEXT: s_setpc_b64 s[30:31] 539; 540; GFX8-LABEL: v_roundeven_f64: 541; GFX8: ; %bb.0: 542; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 543; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 544; GFX8-NEXT: s_setpc_b64 s[30:31] 545; 546; GFX9-LABEL: v_roundeven_f64: 547; GFX9: ; %bb.0: 548; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 549; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 550; GFX9-NEXT: s_setpc_b64 s[30:31] 551; 552; GFX10-LABEL: v_roundeven_f64: 553; GFX10: ; %bb.0: 554; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 555; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 556; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 557; GFX10-NEXT: s_setpc_b64 s[30:31] 558 %roundeven = call double @llvm.roundeven.f64(double %x) 559 ret double %roundeven 560} 561 562define double @v_roundeven_f64_fneg(double %x) { 563; GFX6-LABEL: v_roundeven_f64_fneg: 564; GFX6: ; %bb.0: 565; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX6-NEXT: v_xor_b32_e32 v6, 0x80000000, v1 567; GFX6-NEXT: v_and_b32_e32 v3, 0x80000000, v6 568; GFX6-NEXT: v_mov_b32_e32 v2, 0 569; GFX6-NEXT: v_or_b32_e32 v3, 0x43300000, v3 570; GFX6-NEXT: v_add_f64 v[4:5], -v[0:1], v[2:3] 571; GFX6-NEXT: s_mov_b32 s4, -1 572; GFX6-NEXT: s_mov_b32 s5, 0x432fffff 573; GFX6-NEXT: v_add_f64 v[2:3], v[4:5], -v[2:3] 574; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 575; GFX6-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc 576; GFX6-NEXT: v_cndmask_b32_e32 v1, v3, v6, vcc 577; GFX6-NEXT: s_setpc_b64 s[30:31] 578; 579; GFX7-LABEL: v_roundeven_f64_fneg: 580; GFX7: ; %bb.0: 581; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; GFX7-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 583; GFX7-NEXT: s_setpc_b64 s[30:31] 584; 585; GFX8-LABEL: v_roundeven_f64_fneg: 586; GFX8: ; %bb.0: 587; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX8-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 589; GFX8-NEXT: s_setpc_b64 s[30:31] 590; 591; GFX9-LABEL: v_roundeven_f64_fneg: 592; GFX9: ; %bb.0: 593; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 594; GFX9-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 595; GFX9-NEXT: s_setpc_b64 s[30:31] 596; 597; GFX10-LABEL: v_roundeven_f64_fneg: 598; GFX10: ; %bb.0: 599; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 600; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 601; GFX10-NEXT: v_rndne_f64_e64 v[0:1], -v[0:1] 602; GFX10-NEXT: s_setpc_b64 s[30:31] 603 %neg.x = fneg double %x 604 %roundeven = call double @llvm.roundeven.f64(double %neg.x) 605 ret double %roundeven 606} 607 608define <2 x double> @v_roundeven_v2f64(<2 x double> %x) { 609; GFX6-LABEL: v_roundeven_v2f64: 610; GFX6: ; %bb.0: 611; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX6-NEXT: s_brev_b32 s6, 1 613; GFX6-NEXT: s_mov_b32 s7, 0x43300000 614; GFX6-NEXT: v_and_b32_e32 v5, s6, v1 615; GFX6-NEXT: v_mov_b32_e32 v4, 0 616; GFX6-NEXT: v_or_b32_e32 v5, s7, v5 617; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], v[4:5] 618; GFX6-NEXT: s_mov_b32 s4, -1 619; GFX6-NEXT: s_mov_b32 s5, 0x432fffff 620; GFX6-NEXT: v_add_f64 v[5:6], v[6:7], -v[4:5] 621; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[0:1]|, s[4:5] 622; GFX6-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 623; GFX6-NEXT: v_and_b32_e32 v5, s6, v3 624; GFX6-NEXT: v_or_b32_e32 v5, s7, v5 625; GFX6-NEXT: v_add_f64 v[7:8], v[2:3], v[4:5] 626; GFX6-NEXT: v_cndmask_b32_e32 v1, v6, v1, vcc 627; GFX6-NEXT: v_add_f64 v[4:5], v[7:8], -v[4:5] 628; GFX6-NEXT: v_cmp_gt_f64_e64 vcc, |v[2:3]|, s[4:5] 629; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 630; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 631; GFX6-NEXT: s_setpc_b64 s[30:31] 632; 633; GFX7-LABEL: v_roundeven_v2f64: 634; GFX7: ; %bb.0: 635; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 636; GFX7-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 637; GFX7-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 638; GFX7-NEXT: s_setpc_b64 s[30:31] 639; 640; GFX8-LABEL: v_roundeven_v2f64: 641; GFX8: ; %bb.0: 642; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX8-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 644; GFX8-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 645; GFX8-NEXT: s_setpc_b64 s[30:31] 646; 647; GFX9-LABEL: v_roundeven_v2f64: 648; GFX9: ; %bb.0: 649; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 650; GFX9-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 651; GFX9-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 652; GFX9-NEXT: s_setpc_b64 s[30:31] 653; 654; GFX10-LABEL: v_roundeven_v2f64: 655; GFX10: ; %bb.0: 656; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 658; GFX10-NEXT: v_rndne_f64_e32 v[0:1], v[0:1] 659; GFX10-NEXT: v_rndne_f64_e32 v[2:3], v[2:3] 660; GFX10-NEXT: s_setpc_b64 s[30:31] 661 %roundeven = call <2 x double> @llvm.roundeven.v2f64(<2 x double> %x) 662 ret <2 x double> %roundeven 663} 664 665declare half @llvm.roundeven.f16(half) #0 666declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) #0 667declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) #0 668 669declare float @llvm.roundeven.f32(float) #0 670declare <2 x float> @llvm.roundeven.v2f32(<2 x float>) #0 671declare <3 x float> @llvm.roundeven.v3f32(<3 x float>) #0 672declare <4 x float> @llvm.roundeven.v4f32(<4 x float>) #0 673 674declare double @llvm.roundeven.f64(double) #0 675declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) #0 676 677declare half @llvm.fabs.f16(half) #0 678declare float @llvm.fabs.f32(float) #0 679 680attributes #0 = { nounwind readnone speculatable willreturn } 681