1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX7,UNPACKED-TID %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-xnack -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX90A,PACKED-TID %s 3 4; GCN-LABEL: {{^}}use_workitem_id_x: 5; GCN: s_waitcnt 6; GCN: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v0 7; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] 8; GCN-NEXT: s_waitcnt 9; GCN-NEXT: s_setpc_b64 10define void @use_workitem_id_x() #1 { 11 %val = call i32 @llvm.amdgcn.workitem.id.x() 12 store volatile i32 %val, i32 addrspace(1)* undef 13 ret void 14} 15 16; GCN-LABEL: {{^}}use_workitem_id_y: 17; GCN: s_waitcnt 18; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 10, 10 19; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] 20; GCN-NEXT: s_waitcnt 21; GCN-NEXT: s_setpc_b64 22define void @use_workitem_id_y() #1 { 23 %val = call i32 @llvm.amdgcn.workitem.id.y() 24 store volatile i32 %val, i32 addrspace(1)* undef 25 ret void 26} 27 28; GCN-LABEL: {{^}}use_workitem_id_z: 29; GCN: s_waitcnt 30; GCN: v_bfe_u32 [[ID:v[0-9]+]], v0, 20, 10 31; GCN-NEXT: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[ID]] 32; GCN-NEXT: s_waitcnt 33; GCN-NEXT: s_setpc_b64 34define void @use_workitem_id_z() #1 { 35 %val = call i32 @llvm.amdgcn.workitem.id.z() 36 store volatile i32 %val, i32 addrspace(1)* undef 37 ret void 38} 39 40; GCN-LABEL: {{^}}use_workitem_id_xy: 41; GCN: s_waitcnt 42; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 43; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 44; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]] 45; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]] 46; GCN-NEXT: s_waitcnt 47; GCN-NEXT: s_setpc_b64 48define void @use_workitem_id_xy() #1 { 49 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 50 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 51 store volatile i32 %val0, i32 addrspace(1)* undef 52 store volatile i32 %val1, i32 addrspace(1)* undef 53 ret void 54} 55 56; GCN-LABEL: {{^}}use_workitem_id_xyz: 57; GCN: s_waitcnt 58; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 59; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 60; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 61; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]] 62; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]] 63; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]] 64; GCN-NEXT: s_waitcnt 65; GCN-NEXT: s_setpc_b64 66define void @use_workitem_id_xyz() #1 { 67 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 68 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 69 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 70 store volatile i32 %val0, i32 addrspace(1)* undef 71 store volatile i32 %val1, i32 addrspace(1)* undef 72 store volatile i32 %val2, i32 addrspace(1)* undef 73 ret void 74} 75 76; GCN-LABEL: {{^}}use_workitem_id_xz: 77; GCN: s_waitcnt 78; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v0 79; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 80; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDX]] 81; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]] 82; GCN-NEXT: s_waitcnt 83; GCN-NEXT: s_setpc_b64 84define void @use_workitem_id_xz() #1 { 85 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 86 %val1 = call i32 @llvm.amdgcn.workitem.id.z() 87 store volatile i32 %val0, i32 addrspace(1)* undef 88 store volatile i32 %val1, i32 addrspace(1)* undef 89 ret void 90} 91 92; GCN-LABEL: {{^}}use_workitem_id_yz: 93; GCN: s_waitcnt 94; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v0, 10, 10 95; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v0, 20, 10 96; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDY]] 97; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]:[0-9]+\]}}, [[IDZ]] 98; GCN-NEXT: s_waitcnt 99; GCN-NEXT: s_setpc_b64 100define void @use_workitem_id_yz() #1 { 101 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 102 %val1 = call i32 @llvm.amdgcn.workitem.id.z() 103 store volatile i32 %val0, i32 addrspace(1)* undef 104 store volatile i32 %val1, i32 addrspace(1)* undef 105 ret void 106} 107 108; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x: 109 110; GCN-NOT: v0 111; GCN: s_swappc_b64 112; GCN-NOT: v0 113 114; GCN: .amdhsa_system_vgpr_workitem_id 0 115define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 { 116 call void @use_workitem_id_x() 117 ret void 118} 119 120; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y: 121 122; GCN-NOT: v0 123; GCN-NOT: v1 124; UNPACKED-TID: v_lshlrev_b32_e32 v0, 10, v1 125; UNPACKED-TID-NOT: v0 126; UNPACKED-TID-NOT: v1 127; GCN: s_swappc_b64 128 129; GCN: .amdhsa_system_vgpr_workitem_id 1 130define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 { 131 call void @use_workitem_id_y() 132 ret void 133} 134 135; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z: 136 137; GCN-NOT: v0 138; GCN-NOT: v2 139; UNPACKED-TID: v_lshlrev_b32_e32 v0, 20, v2 140; UNPACKED-TID-NOT: v0 141; UNPACKED-TID-NOT: v1 142; GCN: s_swappc_b64 143 144; GCN: .amdhsa_system_vgpr_workitem_id 2 145define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 { 146 call void @use_workitem_id_z() 147 ret void 148} 149 150; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xy: 151; UNPACKED-TID-NOT: v0 152; UNPACKED-TID-NOT: v1 153; UNPACKED-TID: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 154; UNPACKED-TID: v_or_b32_e32 v0, v0, [[IDY]] 155; GCN-NOT: v0 156; GCN-NOT: v1 157; GCN: s_swappc_b64 158define amdgpu_kernel void @kern_indirect_use_workitem_id_xy() #1 { 159 call void @use_workitem_id_xy() 160 ret void 161} 162 163; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xz: 164; UNPACKED-TID-NOT: v0 165; UNPACKED-TID-NOT: v2 166; UNPACKED-TID: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 167; UNPACKED-TID: v_or_b32_e32 v0, v0, [[IDZ]] 168; GCN-NOT: v0 169; GCN-NOT: v2 170; GCN: s_swappc_b64 171define amdgpu_kernel void @kern_indirect_use_workitem_id_xz() #1 { 172 call void @use_workitem_id_xz() 173 ret void 174} 175 176; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_yz: 177; UNPACKED-TID-NOT: v1 178; UNPACKED-TID-NOT: v2 179; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 180; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 181; UNPACKED-TID: v_or_b32_e32 v0, [[IDY]], [[IDZ]] 182; GCN-NOT: v1 183; GCN-NOT: v2 184; GCN: s_swappc_b64 185define amdgpu_kernel void @kern_indirect_use_workitem_id_yz() #1 { 186 call void @use_workitem_id_yz() 187 ret void 188} 189 190; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_xyz: 191; UNPACKED-TID-NOT: v0 192; UNPACKED-TID-NOT: v1 193; UNPACKED-TID-NOT: v2 194; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDY:v[0-9]+]], 10, v1 195; UNPACKED-TID-DAG: v_lshlrev_b32_e32 [[IDZ:v[0-9]+]], 20, v2 196; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDY]] 197; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, [[IDZ]] 198; GCN-NOT: v0 199; GCN-NOT: v1 200; GCN-NOT: v2 201; GCN: s_swappc_b64 202define amdgpu_kernel void @kern_indirect_use_workitem_id_xyz() #1 { 203 call void @use_workitem_id_xyz() 204 ret void 205} 206 207; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x: 208; GCN-NOT: v0 209; GCN: s_swappc_b64 210; GCN-NOT: v0 211define void @func_indirect_use_workitem_id_x() #1 { 212 call void @use_workitem_id_x() 213 ret void 214} 215 216; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y: 217; GCN-NOT: v0 218; GCN: s_swappc_b64 219; GCN-NOT: v0 220define void @func_indirect_use_workitem_id_y() #1 { 221 call void @use_workitem_id_y() 222 ret void 223} 224 225; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z: 226; GCN-NOT: v0 227; GCN: s_swappc_b64 228; GCN-NOT: v0 229define void @func_indirect_use_workitem_id_z() #1 { 230 call void @use_workitem_id_z() 231 ret void 232} 233 234; GCN-LABEL: {{^}}other_arg_use_workitem_id_x: 235; GCN: s_waitcnt 236; GCN-DAG: v_and_b32_e32 [[ID:v[0-9]+]], 0x3ff, v1 237; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 238; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] 239define void @other_arg_use_workitem_id_x(i32 %arg0) #1 { 240 %val = call i32 @llvm.amdgcn.workitem.id.x() 241 store volatile i32 %arg0, i32 addrspace(1)* undef 242 store volatile i32 %val, i32 addrspace(1)* undef 243 ret void 244} 245 246; GCN-LABEL: {{^}}other_arg_use_workitem_id_y: 247; GCN: s_waitcnt 248; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 10, 10 249; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 250; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] 251define void @other_arg_use_workitem_id_y(i32 %arg0) #1 { 252 %val = call i32 @llvm.amdgcn.workitem.id.y() 253 store volatile i32 %arg0, i32 addrspace(1)* undef 254 store volatile i32 %val, i32 addrspace(1)* undef 255 ret void 256} 257 258; GCN-LABEL: {{^}}other_arg_use_workitem_id_z: 259; GCN: s_waitcnt 260; GCN-DAG: v_bfe_u32 [[ID:v[0-9]+]], v1, 20, 10 261; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 262; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[ID]] 263define void @other_arg_use_workitem_id_z(i32 %arg0) #1 { 264 %val = call i32 @llvm.amdgcn.workitem.id.z() 265 store volatile i32 %arg0, i32 addrspace(1)* undef 266 store volatile i32 %val, i32 addrspace(1)* undef 267 ret void 268} 269 270 271; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x: 272 273; GCN: v_mov_b32_e32 v1, v0 274; GCN: v_mov_b32_e32 v0, 0x22b 275; GCN: s_swappc_b64 276 277; GCN: .amdhsa_system_vgpr_workitem_id 0 278define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 { 279 call void @other_arg_use_workitem_id_x(i32 555) 280 ret void 281} 282 283 284; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y: 285 286; UNPACKED-TID: v_lshlrev_b32_e32 v1, 10, v1 287; PACKED-TID: v_mov_b32_e32 v1, v0 288; GCN-NOT: v1 289; GCN: v_mov_b32_e32 v0, 0x22b 290; GCN-NOT: v1 291; GCN: s_swappc_b64 292; GCN-NOT: v0 293 294; GCN: .amdhsa_system_vgpr_workitem_id 1 295define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 { 296 call void @other_arg_use_workitem_id_y(i32 555) 297 ret void 298} 299 300; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z: 301 302; GCN-DAG: v_mov_b32_e32 v0, 0x22b 303; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 20, v2 304; PACKED-TID-DAG: v_mov_b32_e32 v1, v0 305; GCN: s_swappc_b64 306; GCN-NOT: v0 307 308; GCN: .amdhsa_system_vgpr_workitem_id 2 309define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 { 310 call void @other_arg_use_workitem_id_z(i32 555) 311 ret void 312} 313 314; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x: 315; GCN: buffer_load_dword v32, off, s[0:3], s32{{$}} 316; GCN: v_and_b32_e32 v32, 0x3ff, v32 317; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, v32 318; GCN: s_setpc_b64 319define void @too_many_args_use_workitem_id_x( 320 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 321 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 322 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 323 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { 324 %val = call i32 @llvm.amdgcn.workitem.id.x() 325 store volatile i32 %val, i32 addrspace(1)* undef 326 327 store volatile i32 %arg0, i32 addrspace(1)* undef 328 store volatile i32 %arg1, i32 addrspace(1)* undef 329 store volatile i32 %arg2, i32 addrspace(1)* undef 330 store volatile i32 %arg3, i32 addrspace(1)* undef 331 store volatile i32 %arg4, i32 addrspace(1)* undef 332 store volatile i32 %arg5, i32 addrspace(1)* undef 333 store volatile i32 %arg6, i32 addrspace(1)* undef 334 store volatile i32 %arg7, i32 addrspace(1)* undef 335 336 store volatile i32 %arg8, i32 addrspace(1)* undef 337 store volatile i32 %arg9, i32 addrspace(1)* undef 338 store volatile i32 %arg10, i32 addrspace(1)* undef 339 store volatile i32 %arg11, i32 addrspace(1)* undef 340 store volatile i32 %arg12, i32 addrspace(1)* undef 341 store volatile i32 %arg13, i32 addrspace(1)* undef 342 store volatile i32 %arg14, i32 addrspace(1)* undef 343 store volatile i32 %arg15, i32 addrspace(1)* undef 344 345 store volatile i32 %arg16, i32 addrspace(1)* undef 346 store volatile i32 %arg17, i32 addrspace(1)* undef 347 store volatile i32 %arg18, i32 addrspace(1)* undef 348 store volatile i32 %arg19, i32 addrspace(1)* undef 349 store volatile i32 %arg20, i32 addrspace(1)* undef 350 store volatile i32 %arg21, i32 addrspace(1)* undef 351 store volatile i32 %arg22, i32 addrspace(1)* undef 352 store volatile i32 %arg23, i32 addrspace(1)* undef 353 354 store volatile i32 %arg24, i32 addrspace(1)* undef 355 store volatile i32 %arg25, i32 addrspace(1)* undef 356 store volatile i32 %arg26, i32 addrspace(1)* undef 357 store volatile i32 %arg27, i32 addrspace(1)* undef 358 store volatile i32 %arg28, i32 addrspace(1)* undef 359 store volatile i32 %arg29, i32 addrspace(1)* undef 360 store volatile i32 %arg30, i32 addrspace(1)* undef 361 store volatile i32 %arg31, i32 addrspace(1)* undef 362 363 ret void 364} 365 366; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x: 367 368; GCN: s_mov_b32 s32, 0 369; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} 370; GCN: s_swappc_b64 371 372; GCN: .amdhsa_system_vgpr_workitem_id 0 373define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 { 374 call void @too_many_args_use_workitem_id_x( 375 i32 10, i32 20, i32 30, i32 40, 376 i32 50, i32 60, i32 70, i32 80, 377 i32 90, i32 100, i32 110, i32 120, 378 i32 130, i32 140, i32 150, i32 160, 379 i32 170, i32 180, i32 190, i32 200, 380 i32 210, i32 220, i32 230, i32 240, 381 i32 250, i32 260, i32 270, i32 280, 382 i32 290, i32 300, i32 310, i32 320) 383 ret void 384} 385 386; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x: 387; GCN: s_mov_b32 s33, s32 388; GCN: buffer_store_dword v1, off, s[0:3], s32{{$}} 389; GCN: s_swappc_b64 390define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 { 391 store volatile i32 %arg0, i32 addrspace(1)* undef 392 call void @too_many_args_use_workitem_id_x( 393 i32 10, i32 20, i32 30, i32 40, 394 i32 50, i32 60, i32 70, i32 80, 395 i32 90, i32 100, i32 110, i32 120, 396 i32 130, i32 140, i32 150, i32 160, 397 i32 170, i32 180, i32 190, i32 200, 398 i32 210, i32 220, i32 230, i32 240, 399 i32 250, i32 260, i32 270, i32 280, 400 i32 290, i32 300, i32 310, i32 320) 401 ret void 402} 403 404; Requires loading and storing to stack slot. 405; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x: 406; GCN-DAG: s_addk_i32 s32, 0x400{{$}} 407; GCN-DAG: buffer_store_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill 408; GCN-DAG: buffer_load_dword v32, off, s[0:3], s33{{$}} 409 410; GCN: buffer_store_dword v32, off, s[0:3], s32{{$}} 411 412; GCN: s_swappc_b64 413 414; GCN: s_addk_i32 s32, 0xfc00{{$}} 415; GCN: buffer_load_dword v40, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload 416; GCN: s_setpc_b64 417define void @too_many_args_call_too_many_args_use_workitem_id_x( 418 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 419 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 420 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 421 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { 422 call void @too_many_args_use_workitem_id_x( 423 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 424 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 425 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 426 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) 427 ret void 428} 429 430; stack layout: 431; frame[0] = byval arg32 432; frame[1] = stack passed workitem ID x 433; frame[2] = VGPR spill slot 434 435; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval: 436; GFX7: buffer_load_dword v32, off, s[0:3], s32 offset:4 437; GFX90A: buffer_load_dword v32, off, s[0:3], s32 offset:4 438; GCN-DAG: s_waitcnt 439; GFX7: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 440; GFX90A: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32, 441; GFX7: buffer_load_dword v0, off, s[0:3], s32 glc{{$}} 442; GFX90A: buffer_load_dword v0, off, s[0:3], s32 glc{{$}} 443; GCN: s_setpc_b64 444define void @too_many_args_use_workitem_id_x_byval( 445 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 446 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 447 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 448 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval(i32) %arg32) #1 { 449 %val = call i32 @llvm.amdgcn.workitem.id.x() 450 store volatile i32 %val, i32 addrspace(1)* undef 451 452 store volatile i32 %arg0, i32 addrspace(1)* undef 453 store volatile i32 %arg1, i32 addrspace(1)* undef 454 store volatile i32 %arg2, i32 addrspace(1)* undef 455 store volatile i32 %arg3, i32 addrspace(1)* undef 456 store volatile i32 %arg4, i32 addrspace(1)* undef 457 store volatile i32 %arg5, i32 addrspace(1)* undef 458 store volatile i32 %arg6, i32 addrspace(1)* undef 459 store volatile i32 %arg7, i32 addrspace(1)* undef 460 461 store volatile i32 %arg8, i32 addrspace(1)* undef 462 store volatile i32 %arg9, i32 addrspace(1)* undef 463 store volatile i32 %arg10, i32 addrspace(1)* undef 464 store volatile i32 %arg11, i32 addrspace(1)* undef 465 store volatile i32 %arg12, i32 addrspace(1)* undef 466 store volatile i32 %arg13, i32 addrspace(1)* undef 467 store volatile i32 %arg14, i32 addrspace(1)* undef 468 store volatile i32 %arg15, i32 addrspace(1)* undef 469 470 store volatile i32 %arg16, i32 addrspace(1)* undef 471 store volatile i32 %arg17, i32 addrspace(1)* undef 472 store volatile i32 %arg18, i32 addrspace(1)* undef 473 store volatile i32 %arg19, i32 addrspace(1)* undef 474 store volatile i32 %arg20, i32 addrspace(1)* undef 475 store volatile i32 %arg21, i32 addrspace(1)* undef 476 store volatile i32 %arg22, i32 addrspace(1)* undef 477 store volatile i32 %arg23, i32 addrspace(1)* undef 478 479 store volatile i32 %arg24, i32 addrspace(1)* undef 480 store volatile i32 %arg25, i32 addrspace(1)* undef 481 store volatile i32 %arg26, i32 addrspace(1)* undef 482 store volatile i32 %arg27, i32 addrspace(1)* undef 483 store volatile i32 %arg28, i32 addrspace(1)* undef 484 store volatile i32 %arg29, i32 addrspace(1)* undef 485 store volatile i32 %arg30, i32 addrspace(1)* undef 486 store volatile i32 %arg31, i32 addrspace(1)* undef 487 %private = load volatile i32, i32 addrspace(5)* %arg32 488 ret void 489} 490 491; sp[0] = byval 492; sp[1] = ?? 493; sp[2] = stack passed workitem ID x 494 495; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval: 496; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} 497 498; GCN: buffer_store_dword [[K]], off, s[0:3], 0 offset:4 499; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], 0 offset:4 500; GCN: s_movk_i32 s32, 0x400 501; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:4 502 503; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} 504; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], 505; GCN: s_swappc_b64 506 507; GCN: .amdhsa_system_vgpr_workitem_id 0 508define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 { 509 %alloca = alloca i32, align 4, addrspace(5) 510 store volatile i32 999, i32 addrspace(5)* %alloca 511 call void @too_many_args_use_workitem_id_x_byval( 512 i32 10, i32 20, i32 30, i32 40, 513 i32 50, i32 60, i32 70, i32 80, 514 i32 90, i32 100, i32 110, i32 120, 515 i32 130, i32 140, i32 150, i32 160, 516 i32 170, i32 180, i32 190, i32 200, 517 i32 210, i32 220, i32 230, i32 240, 518 i32 250, i32 260, i32 270, i32 280, 519 i32 290, i32 300, i32 310, i32 320, 520 i32 addrspace(5)* byval(i32) %alloca) 521 ret void 522} 523 524; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval: 525; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}} 526; GFX7: buffer_store_dword [[K]], off, s[0:3], s33{{$}} 527; GFX90A: buffer_store_dword [[K]], off, s[0:3], s33{{$}} 528; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33{{$}} 529; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32{{$}} 530; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]], 531; GCN: s_swappc_b64 532define void @func_call_too_many_args_use_workitem_id_x_byval() #1 { 533 %alloca = alloca i32, align 4, addrspace(5) 534 store volatile i32 999, i32 addrspace(5)* %alloca 535 call void @too_many_args_use_workitem_id_x_byval( 536 i32 10, i32 20, i32 30, i32 40, 537 i32 50, i32 60, i32 70, i32 80, 538 i32 90, i32 100, i32 110, i32 120, 539 i32 130, i32 140, i32 150, i32 160, 540 i32 170, i32 180, i32 190, i32 200, 541 i32 210, i32 220, i32 230, i32 240, 542 i32 250, i32 260, i32 270, i32 280, 543 i32 290, i32 300, i32 310, i32 320, 544 i32 addrspace(5)* byval(i32) %alloca) 545 ret void 546} 547 548; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz: 549; GFX90A: buffer_load_dword v32, off, s[0:3], s32{{$}} 550; GFX90A: v_and_b32_e32 v33, 0x3ff, v32 551; GFX90A: v_bfe_u32 v34, v32, 10, 10 552; GCN90A: v_bfe_u32 v32, v32, 20, 10 553; GFX7: buffer_load_dword v32, off, s[0:3], s32{{$}} 554; GFX7: v_and_b32_e32 v33, 0x3ff, v32 555; GFX7: v_bfe_u32 v33, v32, 10, 10 556; GCN7: v_bfe_u32 v32, v32, 20, 10 557; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v33{{$}} 558; GFX7: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32{{$}} 559; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v33, off{{$}} 560; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v34, off{{$}} 561; GFX90A: global_store_dword v{{\[[0-9]+:[0-9]+]}}, v32, off{{$}} 562 563; GFX7-COUNT-32: flat_store_dword v{{\[[0-9]+:[0-9]+]}} 564; GFX90A-COUNT-32: global_store_dword v{{\[[0-9]+:[0-9]+]}} 565; GCN-NEXT: s_waitcnt 566; GCN-NEXT: s_setpc_b64 567define void @too_many_args_use_workitem_id_xyz( 568 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 569 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 570 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 571 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 { 572 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 573 store volatile i32 %val0, i32 addrspace(1)* undef 574 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 575 store volatile i32 %val1, i32 addrspace(1)* undef 576 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 577 store volatile i32 %val2, i32 addrspace(1)* undef 578 579 store volatile i32 %arg0, i32 addrspace(1)* undef 580 store volatile i32 %arg1, i32 addrspace(1)* undef 581 store volatile i32 %arg2, i32 addrspace(1)* undef 582 store volatile i32 %arg3, i32 addrspace(1)* undef 583 store volatile i32 %arg4, i32 addrspace(1)* undef 584 store volatile i32 %arg5, i32 addrspace(1)* undef 585 store volatile i32 %arg6, i32 addrspace(1)* undef 586 store volatile i32 %arg7, i32 addrspace(1)* undef 587 588 store volatile i32 %arg8, i32 addrspace(1)* undef 589 store volatile i32 %arg9, i32 addrspace(1)* undef 590 store volatile i32 %arg10, i32 addrspace(1)* undef 591 store volatile i32 %arg11, i32 addrspace(1)* undef 592 store volatile i32 %arg12, i32 addrspace(1)* undef 593 store volatile i32 %arg13, i32 addrspace(1)* undef 594 store volatile i32 %arg14, i32 addrspace(1)* undef 595 store volatile i32 %arg15, i32 addrspace(1)* undef 596 597 store volatile i32 %arg16, i32 addrspace(1)* undef 598 store volatile i32 %arg17, i32 addrspace(1)* undef 599 store volatile i32 %arg18, i32 addrspace(1)* undef 600 store volatile i32 %arg19, i32 addrspace(1)* undef 601 store volatile i32 %arg20, i32 addrspace(1)* undef 602 store volatile i32 %arg21, i32 addrspace(1)* undef 603 store volatile i32 %arg22, i32 addrspace(1)* undef 604 store volatile i32 %arg23, i32 addrspace(1)* undef 605 606 store volatile i32 %arg24, i32 addrspace(1)* undef 607 store volatile i32 %arg25, i32 addrspace(1)* undef 608 store volatile i32 %arg26, i32 addrspace(1)* undef 609 store volatile i32 %arg27, i32 addrspace(1)* undef 610 store volatile i32 %arg28, i32 addrspace(1)* undef 611 store volatile i32 %arg29, i32 addrspace(1)* undef 612 store volatile i32 %arg30, i32 addrspace(1)* undef 613 store volatile i32 %arg31, i32 addrspace(1)* undef 614 615 ret void 616} 617 618; frame[0] = ID { Z, Y, X } 619 620; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz: 621 622; GCN-DAG: s_mov_b32 s32, 0 623 624; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1 625; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1 626; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2 627; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v2 628; PACKED-TID-NOT: v0 629; PACKED-TID-NOT: v1 630; PACKED-TID-NOT: v2 631; GCN: buffer_store_dword v0, off, s[0:3], s32{{$}} 632; GCN: s_swappc_b64 633 634; GCN: .amdhsa_system_vgpr_workitem_id 2 635define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 { 636 call void @too_many_args_use_workitem_id_xyz( 637 i32 10, i32 20, i32 30, i32 40, 638 i32 50, i32 60, i32 70, i32 80, 639 i32 90, i32 100, i32 110, i32 120, 640 i32 130, i32 140, i32 150, i32 160, 641 i32 170, i32 180, i32 190, i32 200, 642 i32 210, i32 220, i32 230, i32 240, 643 i32 250, i32 260, i32 270, i32 280, 644 i32 290, i32 300, i32 310, i32 320) 645 ret void 646} 647 648; workitem ID X in register, yz on stack 649; v31 = workitem ID X 650; frame[0] = workitem { Z, Y, X } 651 652; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz: 653; GCN-DAG: v_and_b32_e32 [[IDX:v[0-9]+]], 0x3ff, v31 654; GCN-DAG: {{flat|global}}_store_dword v[0:1], [[IDX]] 655; GCN-DAG: v_bfe_u32 [[IDY:v[0-9]+]], v31, 10, 10 656; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDY]] 657; GCN-DAG: v_bfe_u32 [[IDZ:v[0-9]+]], v31, 20, 10 658; GCN-DAG: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}}, [[IDZ]] 659 660; GCN-COUNT-31: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+]}} 661; GCN-NEXT: s_waitcnt 662; GCN: s_setpc_b64 663; GCN: ScratchSize: 0 664define void @too_many_args_use_workitem_id_x_stack_yz( 665 i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7, 666 i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15, 667 i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23, 668 i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 { 669 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 670 store volatile i32 %val0, i32 addrspace(1)* undef 671 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 672 store volatile i32 %val1, i32 addrspace(1)* undef 673 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 674 store volatile i32 %val2, i32 addrspace(1)* undef 675 676 store volatile i32 %arg0, i32 addrspace(1)* undef 677 store volatile i32 %arg1, i32 addrspace(1)* undef 678 store volatile i32 %arg2, i32 addrspace(1)* undef 679 store volatile i32 %arg3, i32 addrspace(1)* undef 680 store volatile i32 %arg4, i32 addrspace(1)* undef 681 store volatile i32 %arg5, i32 addrspace(1)* undef 682 store volatile i32 %arg6, i32 addrspace(1)* undef 683 store volatile i32 %arg7, i32 addrspace(1)* undef 684 685 store volatile i32 %arg8, i32 addrspace(1)* undef 686 store volatile i32 %arg9, i32 addrspace(1)* undef 687 store volatile i32 %arg10, i32 addrspace(1)* undef 688 store volatile i32 %arg11, i32 addrspace(1)* undef 689 store volatile i32 %arg12, i32 addrspace(1)* undef 690 store volatile i32 %arg13, i32 addrspace(1)* undef 691 store volatile i32 %arg14, i32 addrspace(1)* undef 692 store volatile i32 %arg15, i32 addrspace(1)* undef 693 694 store volatile i32 %arg16, i32 addrspace(1)* undef 695 store volatile i32 %arg17, i32 addrspace(1)* undef 696 store volatile i32 %arg18, i32 addrspace(1)* undef 697 store volatile i32 %arg19, i32 addrspace(1)* undef 698 store volatile i32 %arg20, i32 addrspace(1)* undef 699 store volatile i32 %arg21, i32 addrspace(1)* undef 700 store volatile i32 %arg22, i32 addrspace(1)* undef 701 store volatile i32 %arg23, i32 addrspace(1)* undef 702 703 store volatile i32 %arg24, i32 addrspace(1)* undef 704 store volatile i32 %arg25, i32 addrspace(1)* undef 705 store volatile i32 %arg26, i32 addrspace(1)* undef 706 store volatile i32 %arg27, i32 addrspace(1)* undef 707 store volatile i32 %arg28, i32 addrspace(1)* undef 708 store volatile i32 %arg29, i32 addrspace(1)* undef 709 store volatile i32 %arg30, i32 addrspace(1)* undef 710 711 ret void 712} 713 714; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz: 715 716; GCN-NOT: v0 717; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v1, 10, v1 718; UNPACKED-TID-DAG: v_or_b32_e32 v0, v0, v1 719; UNPACKED-TID-DAG: v_lshlrev_b32_e32 v2, 20, v2 720; UNPACKED-TID-DAG: v_or_b32_e32 v31, v0, v2 721; PACKED-TID: v_mov_b32_e32 v31, v0 722 723; GCN: s_mov_b32 s32, 0 724; GCN: s_swappc_b64 725 726; GCN: .amdhsa_system_vgpr_workitem_id 2 727define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 { 728 call void @too_many_args_use_workitem_id_x_stack_yz( 729 i32 10, i32 20, i32 30, i32 40, 730 i32 50, i32 60, i32 70, i32 80, 731 i32 90, i32 100, i32 110, i32 120, 732 i32 130, i32 140, i32 150, i32 160, 733 i32 170, i32 180, i32 190, i32 200, 734 i32 210, i32 220, i32 230, i32 240, 735 i32 250, i32 260, i32 270, i32 280, 736 i32 290, i32 300, i32 310) 737 ret void 738} 739 740declare i32 @llvm.amdgcn.workitem.id.x() #0 741declare i32 @llvm.amdgcn.workitem.id.y() #0 742declare i32 @llvm.amdgcn.workitem.id.z() #0 743 744attributes #0 = { nounwind readnone speculatable "amdgpu-flat-work-group-size"="1,512" } 745attributes #1 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" } 746