1; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 2; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 3 4 5; FUNC-LABEL: {{^}}ngroups_x: 6; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 7; EG: MOV [[VAL]], KC0[0].X 8 9; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 10; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 11; SI: buffer_store_dword [[VVAL]] 12define void @ngroups_x (i32 addrspace(1)* %out) { 13entry: 14 %0 = call i32 @llvm.r600.read.ngroups.x() #0 15 store i32 %0, i32 addrspace(1)* %out 16 ret void 17} 18 19; FUNC-LABEL: {{^}}ngroups_y: 20; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 21; EG: MOV [[VAL]], KC0[0].Y 22 23; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 24; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 25; SI: buffer_store_dword [[VVAL]] 26define void @ngroups_y (i32 addrspace(1)* %out) { 27entry: 28 %0 = call i32 @llvm.r600.read.ngroups.y() #0 29 store i32 %0, i32 addrspace(1)* %out 30 ret void 31} 32 33; FUNC-LABEL: {{^}}ngroups_z: 34; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 35; EG: MOV [[VAL]], KC0[0].Z 36 37; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 38; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 39; SI: buffer_store_dword [[VVAL]] 40define void @ngroups_z (i32 addrspace(1)* %out) { 41entry: 42 %0 = call i32 @llvm.r600.read.ngroups.z() #0 43 store i32 %0, i32 addrspace(1)* %out 44 ret void 45} 46 47; FUNC-LABEL: {{^}}global_size_x: 48; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 49; EG: MOV [[VAL]], KC0[0].W 50 51; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 52; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 53; SI: buffer_store_dword [[VVAL]] 54define void @global_size_x (i32 addrspace(1)* %out) { 55entry: 56 %0 = call i32 @llvm.r600.read.global.size.x() #0 57 store i32 %0, i32 addrspace(1)* %out 58 ret void 59} 60 61; FUNC-LABEL: {{^}}global_size_y: 62; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 63; EG: MOV [[VAL]], KC0[1].X 64 65; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 66; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 67; SI: buffer_store_dword [[VVAL]] 68define void @global_size_y (i32 addrspace(1)* %out) { 69entry: 70 %0 = call i32 @llvm.r600.read.global.size.y() #0 71 store i32 %0, i32 addrspace(1)* %out 72 ret void 73} 74 75; FUNC-LABEL: {{^}}global_size_z: 76; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 77; EG: MOV [[VAL]], KC0[1].Y 78 79; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 80; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 81; SI: buffer_store_dword [[VVAL]] 82define void @global_size_z (i32 addrspace(1)* %out) { 83entry: 84 %0 = call i32 @llvm.r600.read.global.size.z() #0 85 store i32 %0, i32 addrspace(1)* %out 86 ret void 87} 88 89; FUNC-LABEL: {{^}}local_size_x: 90; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 91; EG: MOV [[VAL]], KC0[1].Z 92 93; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 94; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 95; SI: buffer_store_dword [[VVAL]] 96define void @local_size_x (i32 addrspace(1)* %out) { 97entry: 98 %0 = call i32 @llvm.r600.read.local.size.x() #0 99 store i32 %0, i32 addrspace(1)* %out 100 ret void 101} 102 103; FUNC-LABEL: {{^}}local_size_y: 104; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 105; EG: MOV [[VAL]], KC0[1].W 106 107; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 108; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 109; SI: buffer_store_dword [[VVAL]] 110define void @local_size_y (i32 addrspace(1)* %out) { 111entry: 112 %0 = call i32 @llvm.r600.read.local.size.y() #0 113 store i32 %0, i32 addrspace(1)* %out 114 ret void 115} 116 117; FUNC-LABEL: {{^}}local_size_z: 118; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 119; EG: MOV [[VAL]], KC0[2].X 120 121; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 122; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 123; SI: buffer_store_dword [[VVAL]] 124define void @local_size_z (i32 addrspace(1)* %out) { 125entry: 126 %0 = call i32 @llvm.r600.read.local.size.z() #0 127 store i32 %0, i32 addrspace(1)* %out 128 ret void 129} 130 131; FUNC-LABEL: {{^}}get_work_dim: 132; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 133; EG: MOV [[VAL]], KC0[2].Z 134 135; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb 136; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 137; SI: buffer_store_dword [[VVAL]] 138define void @get_work_dim (i32 addrspace(1)* %out) { 139entry: 140 %0 = call i32 @llvm.AMDGPU.read.workdim() #0 141 store i32 %0, i32 addrspace(1)* %out 142 ret void 143} 144 145; The tgid values are stored in sgprs offset by the number of user sgprs. 146; Currently we always use exactly 2 user sgprs for the pointer to the 147; kernel arguments, but this may change in the future. 148 149; FUNC-LABEL: {{^}}tgid_x: 150; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s4 151; SI: buffer_store_dword [[VVAL]] 152define void @tgid_x (i32 addrspace(1)* %out) { 153entry: 154 %0 = call i32 @llvm.r600.read.tgid.x() #0 155 store i32 %0, i32 addrspace(1)* %out 156 ret void 157} 158 159; FUNC-LABEL: {{^}}tgid_y: 160; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s5 161; SI: buffer_store_dword [[VVAL]] 162define void @tgid_y (i32 addrspace(1)* %out) { 163entry: 164 %0 = call i32 @llvm.r600.read.tgid.y() #0 165 store i32 %0, i32 addrspace(1)* %out 166 ret void 167} 168 169; FUNC-LABEL: {{^}}tgid_z: 170; SI: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6 171; SI: buffer_store_dword [[VVAL]] 172define void @tgid_z (i32 addrspace(1)* %out) { 173entry: 174 %0 = call i32 @llvm.r600.read.tgid.z() #0 175 store i32 %0, i32 addrspace(1)* %out 176 ret void 177} 178 179; FUNC-LABEL: {{^}}tidig_x: 180; SI: buffer_store_dword v0 181define void @tidig_x (i32 addrspace(1)* %out) { 182entry: 183 %0 = call i32 @llvm.r600.read.tidig.x() #0 184 store i32 %0, i32 addrspace(1)* %out 185 ret void 186} 187 188; FUNC-LABEL: {{^}}tidig_y: 189; SI: buffer_store_dword v1 190define void @tidig_y (i32 addrspace(1)* %out) { 191entry: 192 %0 = call i32 @llvm.r600.read.tidig.y() #0 193 store i32 %0, i32 addrspace(1)* %out 194 ret void 195} 196 197; FUNC-LABEL: {{^}}tidig_z: 198; SI: buffer_store_dword v2 199define void @tidig_z (i32 addrspace(1)* %out) { 200entry: 201 %0 = call i32 @llvm.r600.read.tidig.z() #0 202 store i32 %0, i32 addrspace(1)* %out 203 ret void 204} 205 206declare i32 @llvm.r600.read.ngroups.x() #0 207declare i32 @llvm.r600.read.ngroups.y() #0 208declare i32 @llvm.r600.read.ngroups.z() #0 209 210declare i32 @llvm.r600.read.global.size.x() #0 211declare i32 @llvm.r600.read.global.size.y() #0 212declare i32 @llvm.r600.read.global.size.z() #0 213 214declare i32 @llvm.r600.read.local.size.x() #0 215declare i32 @llvm.r600.read.local.size.y() #0 216declare i32 @llvm.r600.read.local.size.z() #0 217 218declare i32 @llvm.r600.read.tgid.x() #0 219declare i32 @llvm.r600.read.tgid.y() #0 220declare i32 @llvm.r600.read.tgid.z() #0 221 222declare i32 @llvm.r600.read.tidig.x() #0 223declare i32 @llvm.r600.read.tidig.y() #0 224declare i32 @llvm.r600.read.tidig.z() #0 225 226declare i32 @llvm.AMDGPU.read.workdim() #0 227 228attributes #0 = { readnone } 229