1//===-------- amdgcn_smid.hip - AMDGCN smid implementation -------- HIP -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "target_impl.h"
10
11// Partially derived fom hcc_detail/device_functions.h
12
13// HW_ID Register bit structure
14// WAVE_ID     3:0     Wave buffer slot number. 0-9.
15// SIMD_ID     5:4     SIMD which the wave is assigned to within the CU.
16// PIPE_ID     7:6     Pipeline from which the wave was dispatched.
17// CU_ID       11:8    Compute Unit the wave is assigned to.
18// SH_ID       12      Shader Array (within an SE) the wave is assigned to.
19// SE_ID       14:13   Shader Engine the wave is assigned to.
20// TG_ID       19:16   Thread-group ID
21// VM_ID       23:20   Virtual Memory ID
22// QUEUE_ID    26:24   Queue from which this wave was dispatched.
23// STATE_ID    29:27   State ID (graphics only, not compute).
24// ME_ID       31:30   Micro-engine ID.
25
26enum {
27  HW_ID = 4, // specify that the hardware register to read is HW_ID
28
29  HW_ID_CU_ID_SIZE = 4,   // size of CU_ID field in bits
30  HW_ID_CU_ID_OFFSET = 8, // offset of CU_ID from start of register
31
32  HW_ID_SE_ID_SIZE = 2,    // sizeof SE_ID field in bits
33  HW_ID_SE_ID_OFFSET = 13, // offset of SE_ID from start of register
34};
35
36// The s_getreg_b32 instruction, exposed as an intrinsic, takes a 16 bit
37// immediate and returns a 32 bit value.
38// The encoding of the immediate parameter is:
39// ID           5:0     Which register to read from
40// OFFSET       10:6    Range: 0..31
41// WIDTH        15:11   Range: 1..32
42
43// The asm equivalent is s_getreg_b32 %0, hwreg(HW_REG_HW_ID, Offset, Width)
44// where hwreg forms a 16 bit immediate encoded by the assembler thus:
45// uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
46//   return (Id << 0_) | (Offset << 6) | ((Width - 1) << 11);
47// }
48#define ENCODE_HWREG(WIDTH, OFF, REG) (REG | (OFF << 6) | ((WIDTH - 1) << 11))
49
50// Note: The results can be changed by a context switch
51// Return value in [0 2^SE_ID_SIZE * 2^CU_ID_SIZE), which is an upper
52// bound on how many compute units are available. Some values in this
53// range may never be returned if there are fewer than 2^CU_ID_SIZE CUs.
54
55DEVICE uint32_t __kmpc_impl_smid() {
56  uint32_t cu_id = __builtin_amdgcn_s_getreg(
57      ENCODE_HWREG(HW_ID_CU_ID_SIZE, HW_ID_CU_ID_OFFSET, HW_ID));
58  uint32_t se_id = __builtin_amdgcn_s_getreg(
59      ENCODE_HWREG(HW_ID_SE_ID_SIZE, HW_ID_SE_ID_OFFSET, HW_ID));
60  return (se_id << HW_ID_CU_ID_SIZE) + cu_id;
61}
62