1; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s 2 3; GCN-LABEL: {{^}}load_1d: 4; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 5define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) { 6main_body: 7 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 8 ret <4 x float> %v 9} 10 11; GCN-LABEL: {{^}}load_1d_lwe: 12; GCN: image_load v[0:4], v{{[0-9]+}}, s[0:7] dmask:0xf unorm lwe{{$}} 13define amdgpu_ps <4 x float> @load_1d_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s) { 14main_body: 15 %v = call {<4 x float>, i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 2, i32 0) 16 %v.vec = extractvalue {<4 x float>, i32} %v, 0 17 %v.err = extractvalue {<4 x float>, i32} %v, 1 18 store i32 %v.err, i32 addrspace(1)* %out, align 4 19 ret <4 x float> %v.vec 20} 21 22; GCN-LABEL: {{^}}load_2d: 23; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}} 24define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 25main_body: 26 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 27 ret <4 x float> %v 28} 29 30; GCN-LABEL: {{^}}load_3d: 31; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm{{$}} 32define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { 33main_body: 34 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 35 ret <4 x float> %v 36} 37 38; GCN-LABEL: {{^}}load_cube: 39; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da{{$}} 40define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { 41main_body: 42 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 43 ret <4 x float> %v 44} 45 46; GCN-LABEL: {{^}}load_cube_lwe: 47; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}} 48define amdgpu_ps <4 x float> @load_cube_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { 49main_body: 50 %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) 51 %v.vec = extractvalue {<4 x float>, i32} %v, 0 52 %v.err = extractvalue {<4 x float>, i32} %v, 1 53 store i32 %v.err, i32 addrspace(1)* %out, align 4 54 ret <4 x float> %v.vec 55} 56 57; GCN-LABEL: {{^}}load_1darray: 58; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}} 59define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) { 60main_body: 61 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 62 ret <4 x float> %v 63} 64 65; GCN-LABEL: {{^}}load_2darray: 66; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm da{{$}} 67define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { 68main_body: 69 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 70 ret <4 x float> %v 71} 72 73; GCN-LABEL: {{^}}load_2darray_lwe: 74; GCN: image_load v[0:4], v[{{[0-9]+:[0-9]+}}], s[0:7] dmask:0xf unorm lwe da{{$}} 75define amdgpu_ps <4 x float> @load_2darray_lwe(<8 x i32> inreg %rsrc, i32 addrspace(1)* inreg %out, i32 %s, i32 %t, i32 %slice) { 76main_body: 77 %v = call {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 2, i32 0) 78 %v.vec = extractvalue {<4 x float>, i32} %v, 0 79 %v.err = extractvalue {<4 x float>, i32} %v, 1 80 store i32 %v.err, i32 addrspace(1)* %out, align 4 81 ret <4 x float> %v.vec 82} 83 84; GCN-LABEL: {{^}}load_2dmsaa: 85; GCN: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm{{$}} 86define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { 87main_body: 88 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 89 ret <4 x float> %v 90} 91 92; GCN-LABEL: {{^}}load_2darraymsaa: 93; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 94define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 95main_body: 96 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 97 ret <4 x float> %v 98} 99 100; GCN-LABEL: {{^}}store_1d: 101; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}} 102define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 103main_body: 104 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 105 ret void 106} 107 108; GCN-LABEL: {{^}}store_2d: 109; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}} 110define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { 111main_body: 112 call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 113 ret void 114} 115 116; GCN-LABEL: {{^}}store_3d: 117; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm{{$}} 118define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) { 119main_body: 120 call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 121 ret void 122} 123 124; GCN-LABEL: {{^}}store_cube: 125; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da{{$}} 126define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { 127main_body: 128 call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 129 ret void 130} 131 132; GCN-LABEL: {{^}}store_1darray: 133; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}} 134define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) { 135main_body: 136 call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 137 ret void 138} 139 140; GCN-LABEL: {{^}}store_2darray: 141; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm da{{$}} 142define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { 143main_body: 144 call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 145 ret void 146} 147 148; GCN-LABEL: {{^}}store_2dmsaa: 149; GCN: image_store v[0:3], v[4:6], s[0:7] dmask:0xf unorm{{$}} 150define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) { 151main_body: 152 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 153 ret void 154} 155 156; GCN-LABEL: {{^}}store_2darraymsaa: 157; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 158define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 159main_body: 160 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 161 ret void 162} 163 164; GCN-LABEL: {{^}}load_1d_V1: 165; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}} 166define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) { 167main_body: 168 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 169 ret float %v 170} 171 172; GCN-LABEL: {{^}}load_1d_V2: 173; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}} 174define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) { 175main_body: 176 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 177 ret <2 x float> %v 178} 179 180; GCN-LABEL: {{^}}store_1d_V1: 181; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}} 182define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) { 183main_body: 184 call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 185 ret void 186} 187 188; GCN-LABEL: {{^}}store_1d_V2: 189; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}} 190define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) { 191main_body: 192 call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 193 ret void 194} 195 196; GCN-LABEL: {{^}}load_1d_glc: 197; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}} 198define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) { 199main_body: 200 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 201 ret <4 x float> %v 202} 203 204; GCN-LABEL: {{^}}load_1d_slc: 205; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}} 206define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) { 207main_body: 208 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 209 ret <4 x float> %v 210} 211 212; GCN-LABEL: {{^}}load_1d_glc_slc: 213; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}} 214define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) { 215main_body: 216 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) 217 ret <4 x float> %v 218} 219 220; GCN-LABEL: {{^}}store_1d_glc: 221; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} 222define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 223main_body: 224 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 225 ret void 226} 227 228; GCN-LABEL: {{^}}store_1d_slc: 229; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}} 230define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 231main_body: 232 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 233 ret void 234} 235 236; GCN-LABEL: {{^}}store_1d_glc_slc: 237; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}} 238define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 239main_body: 240 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) 241 ret void 242} 243 244; GCN-LABEL: {{^}}image_store_wait: 245; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf 246; SI: s_waitcnt expcnt(0) 247; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf 248; GCN: s_waitcnt vmcnt(0) 249; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf 250define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { 251main_body: 252 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0) 253 %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0) 254 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0) 255 ret void 256} 257 258; GCN-LABEL: image_load_mmo 259; GCN: image_load v1, v[2:3], s[0:7] dmask:0x1 unorm 260define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { 261 store float 0.000000e+00, float addrspace(3)* %lds 262 %c0 = extractelement <2 x i32> %c, i32 0 263 %c1 = extractelement <2 x i32> %c, i32 1 264 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 1, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0) 265 %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 266 store float 0.000000e+00, float addrspace(3)* %tmp2 267 ret float %tex 268} 269 270declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 271declare {float,i32} @llvm.amdgcn.image.load.1d.f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 272declare {<2 x float>,i32} @llvm.amdgcn.image.load.1d.v2f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 273declare {<4 x float>,i32} @llvm.amdgcn.image.load.1d.v4f32i32.i32(i32, i32, <8 x i32>, i32, i32) #1 274declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 275declare {<4 x float>,i32} @llvm.amdgcn.image.load.2d.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 276declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 277declare {<4 x float>,i32} @llvm.amdgcn.image.load.3d.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 278declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 279declare {<4 x float>,i32} @llvm.amdgcn.image.load.cube.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 280declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 281declare {<4 x float>,i32} @llvm.amdgcn.image.load.1darray.v4f32i32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 282declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 283declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darray.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 284declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 285declare {<4 x float>,i32} @llvm.amdgcn.image.load.2dmsaa.v4f32i32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 286declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 287declare {<4 x float>,i32} @llvm.amdgcn.image.load.2darraymsaa.v4f32i32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 288 289declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0 290declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 291declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 292declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 293declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 294declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 295declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 296declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 297 298declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1 299declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 300declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1 301declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0 302declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0 303 304attributes #0 = { nounwind } 305attributes #1 = { nounwind readonly } 306attributes #2 = { nounwind readnone } 307