1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel < %s -march=amdgcn -mcpu=gfx90a -verify-machineinstrs | FileCheck %s -check-prefix=GFX90A 3 4declare double @llvm.amdgcn.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i1) 5declare double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) 6declare double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double, <4 x i32>, i32, i32, i32 immarg) 7declare double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) 8declare double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double, <4 x i32>, i32, i32, i32 immarg) 9declare double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32, i32 immarg) 10declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i32, i32 immarg) 11declare double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 12declare double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 13declare double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 14declare double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) 15declare double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) 16declare double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) 17declare double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* nocapture, double, i32, i32, i1) 18 19define amdgpu_kernel void @buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 20; GFX90A-LABEL: buffer_atomic_add_noret_f64: 21; GFX90A: ; %bb.0: ; %main_body 22; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 23; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 24; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c 25; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 26; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 27; GFX90A-NEXT: v_mov_b32_e32 v2, s8 28; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen glc 29; GFX90A-NEXT: s_endpgm 30main_body: 31 %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) 32 ret void 33} 34 35define amdgpu_ps void @buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 36; GFX90A-LABEL: buffer_atomic_add_rtn_f64: 37; GFX90A: ; %bb.0: ; %main_body 38; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc 39; GFX90A-NEXT: s_waitcnt vmcnt(0) 40; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 41; GFX90A-NEXT: s_endpgm 42main_body: 43 %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i1 0) 44 store double %ret, double* undef 45 ret void 46} 47 48define amdgpu_kernel void @buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { 49; GFX90A-LABEL: buffer_atomic_add_rtn_f64_off4_slc: 50; GFX90A: ; %bb.0: ; %main_body 51; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 52; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 53; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c 54; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 55; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 56; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 57; GFX90A-NEXT: v_mov_b32_e32 v2, s10 58; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc 59; GFX90A-NEXT: v_mov_b32_e32 v2, 0 60; GFX90A-NEXT: s_waitcnt vmcnt(0) 61; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 62; GFX90A-NEXT: s_endpgm 63main_body: 64 %ret = call double @llvm.amdgcn.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i1 1) 65 store double %ret, double addrspace(1)* %out, align 8 66 ret void 67} 68 69define amdgpu_kernel void @raw_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 70; GFX90A-LABEL: raw_buffer_atomic_add_noret_f64: 71; GFX90A: ; %bb.0: ; %main_body 72; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 73; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 74; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c 75; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 76; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 77; GFX90A-NEXT: v_mov_b32_e32 v2, s8 78; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 offen glc 79; GFX90A-NEXT: s_endpgm 80main_body: 81 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 82 ret void 83} 84 85define amdgpu_ps void @raw_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 86; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64: 87; GFX90A: ; %bb.0: ; %main_body 88; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 offen glc 89; GFX90A-NEXT: s_waitcnt vmcnt(0) 90; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 91; GFX90A-NEXT: s_endpgm 92main_body: 93 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 94 store double %ret, double* undef 95 ret void 96} 97 98define amdgpu_kernel void @raw_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { 99; GFX90A-LABEL: raw_buffer_atomic_add_rtn_f64_off4_slc: 100; GFX90A: ; %bb.0: ; %main_body 101; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 102; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 103; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c 104; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 105; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 106; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 107; GFX90A-NEXT: v_mov_b32_e32 v2, s10 108; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 4 offen glc slc 109; GFX90A-NEXT: v_mov_b32_e32 v2, 0 110; GFX90A-NEXT: s_waitcnt vmcnt(0) 111; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 112; GFX90A-NEXT: s_endpgm 113main_body: 114 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 115 store double %ret, double addrspace(1)* %out, align 8 116 ret void 117} 118 119define amdgpu_kernel void @struct_buffer_atomic_add_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 120; GFX90A-LABEL: struct_buffer_atomic_add_noret_f64: 121; GFX90A: ; %bb.0: ; %main_body 122; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 123; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 124; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c 125; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 126; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 127; GFX90A-NEXT: v_mov_b32_e32 v2, s8 128; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen glc 129; GFX90A-NEXT: s_endpgm 130main_body: 131 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 132 ret void 133} 134 135define amdgpu_ps void @struct_buffer_atomic_add_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 136; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64: 137; GFX90A: ; %bb.0: ; %main_body 138; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[0:3], 0 idxen glc 139; GFX90A-NEXT: s_waitcnt vmcnt(0) 140; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 141; GFX90A-NEXT: s_endpgm 142main_body: 143 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 144 store double %ret, double* undef 145 ret void 146} 147 148define amdgpu_kernel void @struct_buffer_atomic_add_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { 149; GFX90A-LABEL: struct_buffer_atomic_add_rtn_f64_off4_slc: 150; GFX90A: ; %bb.0: ; %main_body 151; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 152; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 153; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c 154; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 155; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 156; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 157; GFX90A-NEXT: v_mov_b32_e32 v2, s10 158; GFX90A-NEXT: buffer_atomic_add_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc 159; GFX90A-NEXT: v_mov_b32_e32 v2, 0 160; GFX90A-NEXT: s_waitcnt vmcnt(0) 161; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 162; GFX90A-NEXT: s_endpgm 163main_body: 164 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fadd.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 165 store double %ret, double addrspace(1)* %out, align 8 166 ret void 167} 168 169define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 170; GFX90A-LABEL: raw_buffer_atomic_min_noret_f64: 171; GFX90A: ; %bb.0: ; %main_body 172; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 173; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 174; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c 175; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 176; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 177; GFX90A-NEXT: v_mov_b32_e32 v2, s8 178; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 offen glc 179; GFX90A-NEXT: s_endpgm 180main_body: 181 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 182 ret void 183} 184 185define amdgpu_ps void @raw_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 186; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64: 187; GFX90A: ; %bb.0: ; %main_body 188; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 offen glc 189; GFX90A-NEXT: s_waitcnt vmcnt(0) 190; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 191; GFX90A-NEXT: s_endpgm 192main_body: 193 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 194 store double %ret, double* undef 195 ret void 196} 197 198define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { 199; GFX90A-LABEL: raw_buffer_atomic_min_rtn_f64_off4_slc: 200; GFX90A: ; %bb.0: ; %main_body 201; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 202; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 203; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c 204; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 205; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 206; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 207; GFX90A-NEXT: v_mov_b32_e32 v2, s10 208; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 4 offen glc slc 209; GFX90A-NEXT: v_mov_b32_e32 v2, 0 210; GFX90A-NEXT: s_waitcnt vmcnt(0) 211; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 212; GFX90A-NEXT: s_endpgm 213main_body: 214 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 215 store double %ret, double addrspace(1)* %out, align 8 216 ret void 217} 218 219define amdgpu_kernel void @struct_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 220; GFX90A-LABEL: struct_buffer_atomic_min_noret_f64: 221; GFX90A: ; %bb.0: ; %main_body 222; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 223; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 224; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c 225; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 226; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 227; GFX90A-NEXT: v_mov_b32_e32 v2, s8 228; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen glc 229; GFX90A-NEXT: s_endpgm 230main_body: 231 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 232 ret void 233} 234 235define amdgpu_ps void @struct_buffer_atomic_min_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 236; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64: 237; GFX90A: ; %bb.0: ; %main_body 238; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[0:3], 0 idxen glc 239; GFX90A-NEXT: s_waitcnt vmcnt(0) 240; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 241; GFX90A-NEXT: s_endpgm 242main_body: 243 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 244 store double %ret, double* undef 245 ret void 246} 247 248define amdgpu_kernel void @struct_buffer_atomic_min_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { 249; GFX90A-LABEL: struct_buffer_atomic_min_rtn_f64_off4_slc: 250; GFX90A: ; %bb.0: ; %main_body 251; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 252; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 253; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c 254; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 255; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 256; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 257; GFX90A-NEXT: v_mov_b32_e32 v2, s10 258; GFX90A-NEXT: buffer_atomic_min_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc 259; GFX90A-NEXT: v_mov_b32_e32 v2, 0 260; GFX90A-NEXT: s_waitcnt vmcnt(0) 261; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 262; GFX90A-NEXT: s_endpgm 263main_body: 264 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmin.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 265 store double %ret, double addrspace(1)* %out, align 8 266 ret void 267} 268 269define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 270; GFX90A-LABEL: raw_buffer_atomic_max_noret_f64: 271; GFX90A: ; %bb.0: ; %main_body 272; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 273; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 274; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c 275; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 276; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 277; GFX90A-NEXT: v_mov_b32_e32 v2, s8 278; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 offen glc 279; GFX90A-NEXT: s_endpgm 280main_body: 281 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 282 ret void 283} 284 285define amdgpu_ps void @raw_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 286; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64: 287; GFX90A: ; %bb.0: ; %main_body 288; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 offen glc 289; GFX90A-NEXT: s_waitcnt vmcnt(0) 290; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 291; GFX90A-NEXT: s_endpgm 292main_body: 293 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0) 294 store double %ret, double* undef 295 ret void 296} 297 298define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { 299; GFX90A-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc: 300; GFX90A: ; %bb.0: ; %main_body 301; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 302; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 303; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c 304; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 305; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 306; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 307; GFX90A-NEXT: v_mov_b32_e32 v2, s10 308; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 4 offen glc slc 309; GFX90A-NEXT: v_mov_b32_e32 v2, 0 310; GFX90A-NEXT: s_waitcnt vmcnt(0) 311; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 312; GFX90A-NEXT: s_endpgm 313main_body: 314 %ret = call double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2) 315 store double %ret, double addrspace(1)* %out, align 8 316 ret void 317} 318 319define amdgpu_kernel void @struct_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 320; GFX90A-LABEL: struct_buffer_atomic_max_noret_f64: 321; GFX90A: ; %bb.0: ; %main_body 322; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 323; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 324; GFX90A-NEXT: s_load_dword s8, s[0:1], 0x3c 325; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 326; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 327; GFX90A-NEXT: v_mov_b32_e32 v2, s8 328; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen glc 329; GFX90A-NEXT: s_endpgm 330main_body: 331 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 332 ret void 333} 334 335define amdgpu_ps void @struct_buffer_atomic_max_rtn_f64(<4 x i32> inreg %rsrc, double %data, i32 %vindex) { 336; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64: 337; GFX90A: ; %bb.0: ; %main_body 338; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[0:3], 0 idxen glc 339; GFX90A-NEXT: s_waitcnt vmcnt(0) 340; GFX90A-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 341; GFX90A-NEXT: s_endpgm 342main_body: 343 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 0, i32 0, i32 0) 344 store double %ret, double* undef 345 ret void 346} 347 348define amdgpu_kernel void @struct_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inreg %rsrc, double %data, i32 %vindex, double addrspace(1)* %out) { 349; GFX90A-LABEL: struct_buffer_atomic_max_rtn_f64_off4_slc: 350; GFX90A: ; %bb.0: ; %main_body 351; GFX90A-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24 352; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34 353; GFX90A-NEXT: s_load_dword s10, s[0:1], 0x3c 354; GFX90A-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x44 355; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 356; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 357; GFX90A-NEXT: v_mov_b32_e32 v2, s10 358; GFX90A-NEXT: buffer_atomic_max_f64 v[0:1], v2, s[4:7], 0 idxen offset:4 glc slc 359; GFX90A-NEXT: v_mov_b32_e32 v2, 0 360; GFX90A-NEXT: s_waitcnt vmcnt(0) 361; GFX90A-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9] 362; GFX90A-NEXT: s_endpgm 363main_body: 364 %ret = call double @llvm.amdgcn.struct.buffer.atomic.fmax.f64(double %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 0, i32 2) 365 store double %ret, double addrspace(1)* %out, align 8 366 ret void 367} 368 369define amdgpu_kernel void @global_atomic_fadd_f64_noret(double addrspace(1)* %ptr, double %data) { 370; GFX90A-LABEL: global_atomic_fadd_f64_noret: 371; GFX90A: ; %bb.0: ; %main_body 372; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 373; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 374; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 375; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] 376; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc 377; GFX90A-NEXT: s_endpgm 378main_body: 379 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 380 ret void 381} 382 383define amdgpu_kernel void @global_atomic_fmin_f64_noret(double addrspace(1)* %ptr, double %data) { 384; GFX90A-LABEL: global_atomic_fmin_f64_noret: 385; GFX90A: ; %bb.0: ; %main_body 386; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 387; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 388; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 389; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] 390; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc 391; GFX90A-NEXT: s_endpgm 392main_body: 393 %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 394 ret void 395} 396 397define amdgpu_kernel void @global_atomic_fmax_f64_noret(double addrspace(1)* %ptr, double %data) { 398; GFX90A-LABEL: global_atomic_fmax_f64_noret: 399; GFX90A: ; %bb.0: ; %main_body 400; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 401; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 402; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 403; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] 404; GFX90A-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc 405; GFX90A-NEXT: s_endpgm 406main_body: 407 %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 408 ret void 409} 410 411define double @global_atomic_fadd_f64_rtn(double addrspace(1)* %ptr, double %data) { 412; GFX90A-LABEL: global_atomic_fadd_f64_rtn: 413; GFX90A: ; %bb.0: ; %main_body 414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GFX90A-NEXT: global_atomic_add_f64 v[0:1], v[0:1], v[2:3], off glc 416; GFX90A-NEXT: s_waitcnt vmcnt(0) 417; GFX90A-NEXT: s_setpc_b64 s[30:31] 418main_body: 419 %ret = call double @llvm.amdgcn.global.atomic.fadd.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 420 ret double %ret 421} 422 423define double @global_atomic_fmax_f64_rtn(double addrspace(1)* %ptr, double %data) { 424; GFX90A-LABEL: global_atomic_fmax_f64_rtn: 425; GFX90A: ; %bb.0: ; %main_body 426; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 427; GFX90A-NEXT: global_atomic_max_f64 v[0:1], v[0:1], v[2:3], off glc 428; GFX90A-NEXT: s_waitcnt vmcnt(0) 429; GFX90A-NEXT: s_setpc_b64 s[30:31] 430main_body: 431 %ret = call double @llvm.amdgcn.global.atomic.fmax.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 432 ret double %ret 433} 434 435define double @global_atomic_fmin_f64_rtn(double addrspace(1)* %ptr, double %data) { 436; GFX90A-LABEL: global_atomic_fmin_f64_rtn: 437; GFX90A: ; %bb.0: ; %main_body 438; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX90A-NEXT: global_atomic_min_f64 v[0:1], v[0:1], v[2:3], off glc 440; GFX90A-NEXT: s_waitcnt vmcnt(0) 441; GFX90A-NEXT: s_setpc_b64 s[30:31] 442main_body: 443 %ret = call double @llvm.amdgcn.global.atomic.fmin.f64.p1f64.f64(double addrspace(1)* %ptr, double %data) 444 ret double %ret 445} 446 447define amdgpu_kernel void @flat_atomic_fadd_f64_noret(double* %ptr, double %data) { 448; GFX90A-LABEL: flat_atomic_fadd_f64_noret: 449; GFX90A: ; %bb.0: ; %main_body 450; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 451; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 452; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 453; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] 454; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc 455; GFX90A-NEXT: s_endpgm 456main_body: 457 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) 458 ret void 459} 460 461define double @flat_atomic_fadd_f64_rtn(double* %ptr, double %data) { 462; GFX90A-LABEL: flat_atomic_fadd_f64_rtn: 463; GFX90A: ; %bb.0: ; %main_body 464; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX90A-NEXT: flat_atomic_add_f64 v[0:1], v[0:1], v[2:3] glc 466; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 467; GFX90A-NEXT: s_setpc_b64 s[30:31] 468main_body: 469 %ret = call double @llvm.amdgcn.flat.atomic.fadd.f64.p0f64.f64(double* %ptr, double %data) 470 ret double %ret 471} 472 473define amdgpu_kernel void @flat_atomic_fmin_f64_noret(double* %ptr, double %data) { 474; GFX90A-LABEL: flat_atomic_fmin_f64_noret: 475; GFX90A: ; %bb.0: ; %main_body 476; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 477; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 478; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 479; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] 480; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc 481; GFX90A-NEXT: s_endpgm 482main_body: 483 %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) 484 ret void 485} 486 487define double @flat_atomic_fmin_f64_rtn(double* %ptr, double %data) { 488; GFX90A-LABEL: flat_atomic_fmin_f64_rtn: 489; GFX90A: ; %bb.0: ; %main_body 490; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 491; GFX90A-NEXT: flat_atomic_min_f64 v[0:1], v[0:1], v[2:3] glc 492; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 493; GFX90A-NEXT: s_setpc_b64 s[30:31] 494main_body: 495 %ret = call double @llvm.amdgcn.flat.atomic.fmin.f64.p0f64.f64(double* %ptr, double %data) 496 ret double %ret 497} 498 499define amdgpu_kernel void @flat_atomic_fmax_f64_noret(double* %ptr, double %data) { 500; GFX90A-LABEL: flat_atomic_fmax_f64_noret: 501; GFX90A: ; %bb.0: ; %main_body 502; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 503; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 504; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1] 505; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[2:3], s[2:3] op_sel:[0,1] 506; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc 507; GFX90A-NEXT: s_endpgm 508main_body: 509 %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) 510 ret void 511} 512 513define double @flat_atomic_fmax_f64_rtn(double* %ptr, double %data) { 514; GFX90A-LABEL: flat_atomic_fmax_f64_rtn: 515; GFX90A: ; %bb.0: ; %main_body 516; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX90A-NEXT: flat_atomic_max_f64 v[0:1], v[0:1], v[2:3] glc 518; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 519; GFX90A-NEXT: s_setpc_b64 s[30:31] 520main_body: 521 %ret = call double @llvm.amdgcn.flat.atomic.fmax.f64.p0f64.f64(double* %ptr, double %data) 522 ret double %ret 523} 524 525define amdgpu_kernel void @local_atomic_fadd_f64_noret(double addrspace(3)* %ptr, double %data) { 526; GFX90A-LABEL: local_atomic_fadd_f64_noret: 527; GFX90A: ; %bb.0: ; %main_body 528; GFX90A-NEXT: s_load_dword s4, s[0:1], 0x24 529; GFX90A-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 530; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 531; GFX90A-NEXT: v_mov_b32_e32 v2, s4 532; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1] 533; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v2, v[0:1] 534; GFX90A-NEXT: s_endpgm 535main_body: 536 %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) 537 ret void 538} 539 540define double @local_atomic_fadd_f64_rtn(double addrspace(3)* %ptr, double %data) { 541; GFX90A-LABEL: local_atomic_fadd_f64_rtn: 542; GFX90A: ; %bb.0: ; %main_body 543; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 544; GFX90A-NEXT: v_mov_b32_e32 v4, v1 545; GFX90A-NEXT: v_mov_b32_e32 v5, v2 546; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[4:5] 547; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 548; GFX90A-NEXT: s_setpc_b64 s[30:31] 549main_body: 550 %ret = call double @llvm.amdgcn.ds.fadd.f64(double addrspace(3)* %ptr, double %data, i32 0, i32 0, i1 0) 551 ret double %ret 552} 553 554define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat(double addrspace(3)* %ptr) { 555; GFX90A-LABEL: local_atomic_fadd_f64_noret_pat: 556; GFX90A: ; %bb.0: ; %main_body 557; GFX90A-NEXT: s_load_dword s0, s[0:1], 0x24 558; GFX90A-NEXT: v_mov_b32_e32 v0, 0 559; GFX90A-NEXT: v_mov_b32_e32 v1, 0x40100000 560; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 561; GFX90A-NEXT: v_mov_b32_e32 v2, s0 562; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 563; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v2, v[0:1] 564; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 565; GFX90A-NEXT: s_endpgm 566main_body: 567 %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst 568 ret void 569} 570 571define double @local_atomic_fadd_f64_rtn_pat(double addrspace(3)* %ptr, double %data) { 572; GFX90A-LABEL: local_atomic_fadd_f64_rtn_pat: 573; GFX90A: ; %bb.0: ; %main_body 574; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 575; GFX90A-NEXT: v_mov_b32_e32 v2, 0 576; GFX90A-NEXT: v_mov_b32_e32 v3, 0x40100000 577; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 578; GFX90A-NEXT: ds_add_rtn_f64 v[0:1], v0, v[2:3] 579; GFX90A-NEXT: s_waitcnt lgkmcnt(0) 580; GFX90A-NEXT: s_setpc_b64 s[30:31] 581main_body: 582 %ret = atomicrmw fadd double addrspace(3)* %ptr, double 4.0 seq_cst 583 ret double %ret 584} 585