1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s 5 6define amdgpu_ps float @ds_fmin_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) { 7; GFX8-LABEL: ds_fmin_f32_ss: 8; GFX8: ; %bb.0: 9; GFX8-NEXT: v_mov_b32_e32 v0, s2 10; GFX8-NEXT: v_mov_b32_e32 v1, s3 11; GFX8-NEXT: s_mov_b32 m0, -1 12; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 13; GFX8-NEXT: s_waitcnt lgkmcnt(0) 14; GFX8-NEXT: ; return to shader part epilog 15; 16; GFX9-LABEL: ds_fmin_f32_ss: 17; GFX9: ; %bb.0: 18; GFX9-NEXT: v_mov_b32_e32 v0, s2 19; GFX9-NEXT: v_mov_b32_e32 v1, s3 20; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 21; GFX9-NEXT: s_waitcnt lgkmcnt(0) 22; GFX9-NEXT: ; return to shader part epilog 23; 24; GFX10-LABEL: ds_fmin_f32_ss: 25; GFX10: ; %bb.0: 26; GFX10-NEXT: v_mov_b32_e32 v0, s2 27; GFX10-NEXT: v_mov_b32_e32 v1, s3 28; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 29; GFX10-NEXT: s_waitcnt lgkmcnt(0) 30; GFX10-NEXT: ; return to shader part epilog 31 %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 32 ret float %ret 33} 34 35define amdgpu_ps float @ds_fmin_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) { 36; GFX8-LABEL: ds_fmin_f32_ss_offset: 37; GFX8: ; %bb.0: 38; GFX8-NEXT: v_mov_b32_e32 v0, s3 39; GFX8-NEXT: v_mov_b32_e32 v1, s2 40; GFX8-NEXT: s_mov_b32 m0, -1 41; GFX8-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 42; GFX8-NEXT: s_waitcnt lgkmcnt(0) 43; GFX8-NEXT: ; return to shader part epilog 44; 45; GFX9-LABEL: ds_fmin_f32_ss_offset: 46; GFX9: ; %bb.0: 47; GFX9-NEXT: v_mov_b32_e32 v0, s3 48; GFX9-NEXT: v_mov_b32_e32 v1, s2 49; GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 50; GFX9-NEXT: s_waitcnt lgkmcnt(0) 51; GFX9-NEXT: ; return to shader part epilog 52; 53; GFX10-LABEL: ds_fmin_f32_ss_offset: 54; GFX10: ; %bb.0: 55; GFX10-NEXT: v_mov_b32_e32 v0, s3 56; GFX10-NEXT: v_mov_b32_e32 v1, s2 57; GFX10-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 58; GFX10-NEXT: s_waitcnt lgkmcnt(0) 59; GFX10-NEXT: ; return to shader part epilog 60 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 61 %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 62 ret float %ret 63} 64 65define amdgpu_ps void @ds_fmin_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { 66; GFX8-LABEL: ds_fmin_f32_ss_nortn: 67; GFX8: ; %bb.0: 68; GFX8-NEXT: v_mov_b32_e32 v0, s2 69; GFX8-NEXT: v_mov_b32_e32 v1, s3 70; GFX8-NEXT: s_mov_b32 m0, -1 71; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 72; GFX8-NEXT: s_endpgm 73; 74; GFX9-LABEL: ds_fmin_f32_ss_nortn: 75; GFX9: ; %bb.0: 76; GFX9-NEXT: v_mov_b32_e32 v0, s2 77; GFX9-NEXT: v_mov_b32_e32 v1, s3 78; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 79; GFX9-NEXT: s_endpgm 80; 81; GFX10-LABEL: ds_fmin_f32_ss_nortn: 82; GFX10: ; %bb.0: 83; GFX10-NEXT: v_mov_b32_e32 v0, s2 84; GFX10-NEXT: v_mov_b32_e32 v1, s3 85; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 86; GFX10-NEXT: s_endpgm 87 %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 88 ret void 89} 90 91define amdgpu_ps void @ds_fmin_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) { 92; GFX8-LABEL: ds_fmin_f32_ss_offset_nortn: 93; GFX8: ; %bb.0: 94; GFX8-NEXT: v_mov_b32_e32 v0, s3 95; GFX8-NEXT: v_mov_b32_e32 v1, s2 96; GFX8-NEXT: s_mov_b32 m0, -1 97; GFX8-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 98; GFX8-NEXT: s_endpgm 99; 100; GFX9-LABEL: ds_fmin_f32_ss_offset_nortn: 101; GFX9: ; %bb.0: 102; GFX9-NEXT: v_mov_b32_e32 v0, s3 103; GFX9-NEXT: v_mov_b32_e32 v1, s2 104; GFX9-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 105; GFX9-NEXT: s_endpgm 106; 107; GFX10-LABEL: ds_fmin_f32_ss_offset_nortn: 108; GFX10: ; %bb.0: 109; GFX10-NEXT: v_mov_b32_e32 v0, s3 110; GFX10-NEXT: v_mov_b32_e32 v1, s2 111; GFX10-NEXT: ds_min_rtn_f32 v0, v1, v0 offset:512 112; GFX10-NEXT: s_endpgm 113 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 114 %unused = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 115 ret void 116} 117 118define float @ds_fmin_f32_vv(float addrspace(3)* %ptr, float %val) { 119; GFX8-LABEL: ds_fmin_f32_vv: 120; GFX8: ; %bb.0: 121; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX8-NEXT: s_mov_b32 m0, -1 123; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 124; GFX8-NEXT: s_waitcnt lgkmcnt(0) 125; GFX8-NEXT: s_setpc_b64 s[30:31] 126; 127; GFX9-LABEL: ds_fmin_f32_vv: 128; GFX9: ; %bb.0: 129; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 131; GFX9-NEXT: s_waitcnt lgkmcnt(0) 132; GFX9-NEXT: s_setpc_b64 s[30:31] 133; 134; GFX10-LABEL: ds_fmin_f32_vv: 135; GFX10: ; %bb.0: 136; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 137; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 138; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 139; GFX10-NEXT: s_waitcnt lgkmcnt(0) 140; GFX10-NEXT: s_setpc_b64 s[30:31] 141 %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 142 ret float %ret 143} 144 145define float @ds_fmin_f32_vv_offset(float addrspace(3)* %ptr, float %val) { 146; GFX8-LABEL: ds_fmin_f32_vv_offset: 147; GFX8: ; %bb.0: 148; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 149; GFX8-NEXT: s_mov_b32 m0, -1 150; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 151; GFX8-NEXT: s_waitcnt lgkmcnt(0) 152; GFX8-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX9-LABEL: ds_fmin_f32_vv_offset: 155; GFX9: ; %bb.0: 156; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 158; GFX9-NEXT: s_waitcnt lgkmcnt(0) 159; GFX9-NEXT: s_setpc_b64 s[30:31] 160; 161; GFX10-LABEL: ds_fmin_f32_vv_offset: 162; GFX10: ; %bb.0: 163; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 165; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 166; GFX10-NEXT: s_waitcnt lgkmcnt(0) 167; GFX10-NEXT: s_setpc_b64 s[30:31] 168 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 169 %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 170 ret float %ret 171} 172 173define void @ds_fmin_f32_vv_nortn(float addrspace(3)* %ptr, float %val) { 174; GFX8-LABEL: ds_fmin_f32_vv_nortn: 175; GFX8: ; %bb.0: 176; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 177; GFX8-NEXT: s_mov_b32 m0, -1 178; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 179; GFX8-NEXT: s_waitcnt lgkmcnt(0) 180; GFX8-NEXT: s_setpc_b64 s[30:31] 181; 182; GFX9-LABEL: ds_fmin_f32_vv_nortn: 183; GFX9: ; %bb.0: 184; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 185; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 186; GFX9-NEXT: s_waitcnt lgkmcnt(0) 187; GFX9-NEXT: s_setpc_b64 s[30:31] 188; 189; GFX10-LABEL: ds_fmin_f32_vv_nortn: 190; GFX10: ; %bb.0: 191; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 193; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 194; GFX10-NEXT: s_waitcnt lgkmcnt(0) 195; GFX10-NEXT: s_setpc_b64 s[30:31] 196 %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false) 197 ret void 198} 199 200define void @ds_fmin_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) { 201; GFX8-LABEL: ds_fmin_f32_vv_offset_nortn: 202; GFX8: ; %bb.0: 203; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 204; GFX8-NEXT: s_mov_b32 m0, -1 205; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 206; GFX8-NEXT: s_waitcnt lgkmcnt(0) 207; GFX8-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX9-LABEL: ds_fmin_f32_vv_offset_nortn: 210; GFX9: ; %bb.0: 211; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 213; GFX9-NEXT: s_waitcnt lgkmcnt(0) 214; GFX9-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX10-LABEL: ds_fmin_f32_vv_offset_nortn: 217; GFX10: ; %bb.0: 218; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 220; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 offset:512 221; GFX10-NEXT: s_waitcnt lgkmcnt(0) 222; GFX10-NEXT: s_setpc_b64 s[30:31] 223 %gep = getelementptr float, float addrspace(3)* %ptr, i32 128 224 %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false) 225 ret void 226} 227 228define float @ds_fmin_f32_vv_volatile(float addrspace(3)* %ptr, float %val) { 229; GFX8-LABEL: ds_fmin_f32_vv_volatile: 230; GFX8: ; %bb.0: 231; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; GFX8-NEXT: s_mov_b32 m0, -1 233; GFX8-NEXT: ds_min_rtn_f32 v0, v0, v1 234; GFX8-NEXT: s_waitcnt lgkmcnt(0) 235; GFX8-NEXT: s_setpc_b64 s[30:31] 236; 237; GFX9-LABEL: ds_fmin_f32_vv_volatile: 238; GFX9: ; %bb.0: 239; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 240; GFX9-NEXT: ds_min_rtn_f32 v0, v0, v1 241; GFX9-NEXT: s_waitcnt lgkmcnt(0) 242; GFX9-NEXT: s_setpc_b64 s[30:31] 243; 244; GFX10-LABEL: ds_fmin_f32_vv_volatile: 245; GFX10: ; %bb.0: 246; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 247; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 248; GFX10-NEXT: ds_min_rtn_f32 v0, v0, v1 249; GFX10-NEXT: s_waitcnt lgkmcnt(0) 250; GFX10-NEXT: s_setpc_b64 s[30:31] 251 %ret = call float @llvm.amdgcn.ds.fmin(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true) 252 ret float %ret 253} 254 255declare float @llvm.amdgcn.ds.fmin(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0 256 257attributes #0 = { argmemonly nounwind willreturn } 258