1 /* 2 * Copyright © 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including 13 * the next paragraph) shall be included in all copies or substantial 14 * portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 20 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 21 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 * DEALINGS IN THE SOFTWARE. 24 */ 25 26 #ifndef _HSAKMTTYPES_H_ 27 #define _HSAKMTTYPES_H_ 28 29 //the definitions and THUNK API are version specific - define the version numbers here 30 #define HSAKMT_VERSION_MAJOR 0 31 #define HSAKMT_VERSION_MINOR 99 32 33 34 #ifdef __cplusplus 35 extern "C" { 36 #endif 37 38 #if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32) 39 40 #if defined(_WIN32) 41 #define HSAKMTAPI __stdcall 42 #else 43 #define HSAKMTAPI 44 #endif 45 46 typedef unsigned char HSAuint8; 47 typedef char HSAint8; 48 typedef unsigned short HSAuint16; 49 typedef signed short HSAint16; 50 typedef unsigned __int32 HSAuint32; 51 typedef signed __int32 HSAint32; 52 typedef signed __int64 HSAint64; 53 typedef unsigned __int64 HSAuint64; 54 55 #elif defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__) 56 57 #include <stdbool.h> 58 #include <stdint.h> 59 60 #define HSAKMTAPI 61 62 typedef uint8_t HSAuint8; 63 typedef int8_t HSAint8; 64 typedef uint16_t HSAuint16; 65 typedef int16_t HSAint16; 66 typedef uint32_t HSAuint32; 67 typedef int32_t HSAint32; 68 typedef int64_t HSAint64; 69 typedef uint64_t HSAuint64; 70 71 #endif 72 73 typedef void* HSA_HANDLE; 74 typedef HSAuint64 HSA_QUEUEID; 75 76 // This is included in order to force the alignments to be 4 bytes so that 77 // it avoids extra padding added by the compiler when a 64-bit binary is generated. 78 #pragma pack(push, hsakmttypes_h, 4) 79 80 // 81 // HSA STATUS codes returned by the KFD Interfaces 82 // 83 84 typedef enum _HSAKMT_STATUS 85 { 86 HSAKMT_STATUS_SUCCESS = 0, // Operation successful 87 HSAKMT_STATUS_ERROR = 1, // General error return if not otherwise specified 88 HSAKMT_STATUS_DRIVER_MISMATCH = 2, // User mode component is not compatible with kernel HSA driver 89 90 HSAKMT_STATUS_INVALID_PARAMETER = 3, // KFD identifies input parameters invalid 91 HSAKMT_STATUS_INVALID_HANDLE = 4, // KFD identifies handle parameter invalid 92 HSAKMT_STATUS_INVALID_NODE_UNIT = 5, // KFD identifies node or unit parameter invalid 93 94 HSAKMT_STATUS_NO_MEMORY = 6, // No memory available (when allocating queues or memory) 95 HSAKMT_STATUS_BUFFER_TOO_SMALL = 7, // A buffer needed to handle a request is too small 96 97 HSAKMT_STATUS_NOT_IMPLEMENTED = 10, // KFD function is not implemented for this set of paramters 98 HSAKMT_STATUS_NOT_SUPPORTED = 11, // KFD function is not supported on this node 99 HSAKMT_STATUS_UNAVAILABLE = 12, // KFD function is not available currently on this node (but 100 // may be at a later time) 101 102 HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened 103 HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR = 21, // user-kernel mode communication failure 104 HSAKMT_STATUS_KERNEL_ALREADY_OPENED = 22, // KFD driver path already opened 105 HSAKMT_STATUS_HSAMMU_UNAVAILABLE = 23, // ATS/PRI 1.1 (Address Translation Services) not available 106 // (IOMMU driver not installed or not-available) 107 108 HSAKMT_STATUS_WAIT_FAILURE = 30, // The wait operation failed 109 HSAKMT_STATUS_WAIT_TIMEOUT = 31, // The wait operation timed out 110 111 HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED = 35, // Memory buffer already registered 112 HSAKMT_STATUS_MEMORY_NOT_REGISTERED = 36, // Memory buffer not registered 113 HSAKMT_STATUS_MEMORY_ALIGNMENT = 37, // Memory parameter not aligned 114 115 } HSAKMT_STATUS; 116 117 // 118 // HSA KFD interface version information. Calling software has to validate that it meets 119 // the minimum interface version as described in the API specification. 120 // All future structures will be extended in a backward compatible fashion. 121 // 122 123 typedef struct _HsaVersionInfo 124 { 125 HSAuint32 KernelInterfaceMajorVersion; // supported kernel interface major version 126 HSAuint32 KernelInterfaceMinorVersion; // supported kernel interface minor version 127 } HsaVersionInfo; 128 129 // 130 // HSA Topology Discovery Infrastructure structure definitions. 131 // The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD 132 // The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined 133 // in the "Heterogeneous System Architecture Detail Topology" specification. 134 // 135 // The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output. 136 // When the call is made within a process context, a "snapshot" of the topology information 137 // is taken within the KFD to avoid any changes during the enumeration process. 138 // The Snapshot is released when hsaKmtReleaseSystemProperties() is called 139 // or when the process exits or is terminated. 140 // 141 142 typedef struct _HsaSystemProperties 143 { 144 HSAuint32 NumNodes; // the number of "H-NUMA" memory nodes. 145 // each node represents a discoverable node of the system 146 // All other enumeration is done on a per-node basis 147 148 HSAuint32 PlatformOem; // identifies HSA platform, reflects the OEMID in the CRAT 149 HSAuint32 PlatformId; // HSA platform ID, reflects OEM TableID in the CRAT 150 HSAuint32 PlatformRev; // HSA platform revision, reflects Platform Table Revision ID 151 } HsaSystemProperties; 152 153 typedef union 154 { 155 HSAuint32 Value; 156 struct 157 { 158 unsigned int uCode : 10; // ucode packet processor version 159 unsigned int Major : 6; // GFXIP Major engine version 160 unsigned int Minor : 8; // GFXIP Minor engine version 161 unsigned int Stepping : 8; // GFXIP Stepping info 162 }ui32; 163 } HSA_ENGINE_ID; 164 165 typedef union 166 { 167 HSAuint32 Value; 168 struct 169 { 170 unsigned int uCodeSDMA: 10; // ucode version SDMA engine 171 unsigned int uCodeRes : 10; // ucode version (reserved) 172 unsigned int Reserved : 12; // Reserved, must be 0 173 }; 174 } HSA_ENGINE_VERSION; 175 176 typedef union 177 { 178 HSAuint32 Value; 179 struct 180 { 181 unsigned int HotPluggable : 1; // the node may be removed by some system action 182 // (event will be sent) 183 unsigned int HSAMMUPresent : 1; // This node has an ATS/PRI 1.1 compatible 184 // translation agent in the system (e.g. IOMMUv2) 185 unsigned int SharedWithGraphics : 1; // this HSA nodes' GPU function is also used for OS primary 186 // graphics render (= UI) 187 unsigned int QueueSizePowerOfTwo : 1; // This node GPU requires the queue size to be a power of 2 value 188 unsigned int QueueSize32bit : 1; // This node GPU requires the queue size to be less than 4GB 189 unsigned int QueueIdleEvent : 1; // This node GPU supports notification on Queue Idle 190 unsigned int VALimit : 1; // This node GPU has limited VA range for platform 191 // (typical 40bit). Affects shared VM use for 64bit apps 192 unsigned int WatchPointsSupported: 1; // Indicates if Watchpoints are available on the node. 193 unsigned int WatchPointsTotalBits: 4; // ld(Watchpoints) available. To determine the number use 2^value 194 195 unsigned int DoorbellType : 2; // 0: This node has pre-1.0 doorbell characteristic 196 // 1: This node has 1.0 doorbell characteristic 197 // 2,3: reserved for future use 198 unsigned int AQLQueueDoubleMap : 1; // The unit needs a VA “double map” 199 unsigned int Reserved : 17; 200 } ui32; 201 } HSA_CAPABILITY; 202 203 204 // 205 // HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties() 206 // The application or runtime can use the information herein to size the topology management structures 207 // Unless there is some very weird setup, there is at most one "GPU" device (with a certain number 208 // of throughput compute units (= SIMDs) associated with a H-NUMA node. 209 // 210 211 #define HSA_PUBLIC_NAME_SIZE 64 // Marketing name string size 212 213 typedef struct _HsaNodeProperties 214 { 215 HSAuint32 NumCPUCores; // # of latency (= CPU) cores present on this HSA node. 216 // This value is 0 for a HSA node with no such cores, 217 // e.g a "discrete HSA GPU" 218 HSAuint32 NumFComputeCores; // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node. 219 // This value is 0 if no FCompute cores are present (e.g. pure "CPU node"). 220 HSAuint32 NumMemoryBanks; // # of discoverable memory bank affinity properties on this "H-NUMA" node. 221 HSAuint32 NumCaches; // # of discoverable cache affinity properties on this "H-NUMA" node. 222 223 HSAuint32 NumIOLinks; // # of discoverable IO link affinity properties of this node 224 // connecting to other nodes. 225 226 HSAuint32 CComputeIdLo; // low value of the logical processor ID of the latency (= CPU) 227 // cores available on this node 228 HSAuint32 FComputeIdLo; // low value of the logical processor ID of the throughput (= GPU) 229 // units available on this node 230 231 HSA_CAPABILITY Capability; // see above 232 233 HSAuint32 MaxWavesPerSIMD; // This identifies the max. number of launched waves per SIMD. 234 // If NumFComputeCores is 0, this value is ignored. 235 HSAuint32 LDSSizeInKB; // Size of Local Data Store in Kilobytes per SIMD Wavefront 236 HSAuint32 GDSSizeInKB; // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts 237 238 HSAuint32 WaveFrontSize; // Number of SIMD cores per wavefront executed, typically 64, 239 // may be 32 or a different value for some HSA based architectures 240 241 HSAuint32 NumShaderBanks; // Number of Shader Banks or Shader Engines, typical values are 1 or 2 242 243 244 HSAuint32 NumArrays; // Number of SIMD arrays per engine 245 HSAuint32 NumCUPerArray; // Number of Compute Units (CU) per SIMD array 246 HSAuint32 NumSIMDPerCU; // Number of SIMD representing a Compute Unit (CU) 247 248 HSAuint32 MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access, 249 // may be 0 if HW has no restrictions 250 251 HSA_ENGINE_ID EngineId; // Identifier (rev) of the GPU uEngine or Firmware, may be 0 252 253 HSAuint16 VendorId; // GPU vendor id; 0 on latency (= CPU)-only nodes 254 HSAuint16 DeviceId; // GPU device id; 0 on latency (= CPU)-only nodes 255 256 HSAuint32 LocationId; // GPU BDF (Bus/Device/function number) - identifies the device 257 // location in the overall system 258 HSAuint64 LocalMemSize; // Local memory size 259 HSAuint32 MaxEngineClockMhzFCompute; // maximum engine clocks for CPU and 260 HSAuint32 MaxEngineClockMhzCCompute; // GPU function, including any boost caopabilities, 261 HSAint32 DrmRenderMinor; // DRM render device minor device number 262 HSAuint16 MarketingName[HSA_PUBLIC_NAME_SIZE]; // Public name of the "device" on the node (board or APU name). 263 // Unicode string 264 HSAuint8 AMDName[HSA_PUBLIC_NAME_SIZE]; //CAL Name of the "device", ASCII 265 HSA_ENGINE_VERSION uCodeEngineVersions; 266 HSAuint8 Reserved[60]; 267 } HsaNodeProperties; 268 269 270 typedef enum _HSA_HEAPTYPE 271 { 272 HSA_HEAPTYPE_SYSTEM = 0, 273 HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC = 1, // CPU "visible" part of GPU device local memory (for discrete GPU) 274 HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU) 275 // All HSA accessible memory is per definition "CPU visible" 276 // "Private memory" is relevant for graphics interop only. 277 HSA_HEAPTYPE_GPU_GDS = 3, // GPU internal memory (GDS) 278 HSA_HEAPTYPE_GPU_LDS = 4, // GPU internal memory (LDS) 279 HSA_HEAPTYPE_GPU_SCRATCH = 5, // GPU special memory (scratch) 280 HSA_HEAPTYPE_DEVICE_SVM = 6, // sys-memory mapped by device page tables 281 282 HSA_HEAPTYPE_NUMHEAPTYPES, 283 HSA_HEAPTYPE_SIZE = 0xFFFFFFFF 284 } HSA_HEAPTYPE; 285 286 typedef union 287 { 288 HSAuint32 MemoryProperty; 289 struct 290 { 291 unsigned int HotPluggable : 1; // the memory may be removed by some system action, 292 // memory should be used for temporary data 293 unsigned int NonVolatile : 1; // memory content is preserved across a power-off cycle. 294 unsigned int Reserved :30; 295 } ui32; 296 } HSA_MEMORYPROPERTY; 297 298 299 // 300 // Discoverable HSA Memory properties. 301 // The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function 302 // 303 304 typedef struct _HsaMemoryProperties 305 { 306 HSA_HEAPTYPE HeapType; // system or frame buffer, 307 union 308 { 309 HSAuint64 SizeInBytes; // physical memory size of the memory range in bytes 310 struct 311 { 312 HSAuint32 SizeInBytesLow; // physical memory size of the memory range in bytes (lower 32bit) 313 HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit) 314 } ui32; 315 }; 316 HSA_MEMORYPROPERTY Flags; // See definitions above 317 318 HSAuint32 Width; // memory width - the number of parallel bits of the memory interface 319 HSAuint32 MemoryClockMax; // memory clock for the memory, this allows computing the available bandwidth 320 // to the memory when needed 321 HSAuint64 VirtualBaseAddress; // if set to value != 0, indicates the virtual base address of the memory 322 // in process virtual space 323 } HsaMemoryProperties; 324 325 // 326 // Discoverable Cache Properties. (optional). 327 // The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function 328 // Any of the parameters may be 0 (= not defined) 329 // 330 331 #define HSA_CPU_SIBLINGS 256 332 #define HSA_PROCESSORID_ALL 0xFFFFFFFF 333 334 typedef union 335 { 336 HSAuint32 Value; 337 struct 338 { 339 unsigned int Data : 1; 340 unsigned int Instruction : 1; 341 unsigned int CPU : 1; 342 unsigned int HSACU : 1; 343 unsigned int Reserved :28; 344 } ui32; 345 } HsaCacheType; 346 347 typedef struct _HaCacheProperties 348 { 349 HSAuint32 ProcessorIdLow; // Identifies the processor number 350 351 HSAuint32 CacheLevel; // Integer representing level: 1, 2, 3, 4, etc 352 HSAuint32 CacheSize; // Size of the cache 353 HSAuint32 CacheLineSize; // Cache line size in bytes 354 HSAuint32 CacheLinesPerTag; // Cache lines per Cache Tag 355 HSAuint32 CacheAssociativity; // Cache Associativity 356 HSAuint32 CacheLatency; // Cache latency in ns 357 HsaCacheType CacheType; 358 HSAuint32 SiblingMap[HSA_CPU_SIBLINGS]; 359 } HsaCacheProperties; 360 361 362 // 363 // Discoverable CPU Compute Properties. (optional). 364 // The structure is the output parameter of the hsaKmtGetCComputeProperties() function 365 // Any of the parameters may be 0 (= not defined) 366 // 367 368 typedef struct _HsaCComputeProperties 369 { 370 HSAuint32 SiblingMap[HSA_CPU_SIBLINGS]; 371 } HsaCComputeProperties; 372 373 // 374 // Discoverable IoLink Properties (optional). 375 // The structure is the output parameter of the hsaKmtGetIoLinkProperties() function. 376 // Any of the parameters may be 0 (= not defined) 377 // 378 379 typedef enum _HSA_IOLINKTYPE { 380 HSA_IOLINKTYPE_UNDEFINED = 0, 381 HSA_IOLINKTYPE_HYPERTRANSPORT = 1, 382 HSA_IOLINKTYPE_PCIEXPRESS = 2, 383 HSA_IOLINKTYPE_AMBA = 3, 384 HSA_IOLINKTYPE_MIPI = 4, 385 HSA_IOLINK_TYPE_QPI_1_1 = 5, 386 HSA_IOLINK_TYPE_RESERVED1 = 6, 387 HSA_IOLINK_TYPE_RESERVED2 = 7, 388 HSA_IOLINK_TYPE_RAPID_IO = 8, 389 HSA_IOLINK_TYPE_INFINIBAND = 9, 390 HSA_IOLINK_TYPE_RESERVED3 = 10, 391 HSA_IOLINKTYPE_OTHER = 11, 392 HSA_IOLINKTYPE_NUMIOLINKTYPES, 393 HSA_IOLINKTYPE_SIZE = 0xFFFFFFFF 394 } HSA_IOLINKTYPE; 395 396 typedef union 397 { 398 HSAuint32 LinkProperty; 399 struct 400 { 401 unsigned int Override : 1; // bus link properties are determined by this structure 402 // not by the HSA_IOLINKTYPE. The other flags are valid 403 // only if this bit is set to one 404 unsigned int NonCoherent : 1; // The link doesn't support coherent transactions 405 // memory accesses across must not be set to "host cacheable"! 406 unsigned int NoAtomics32bit : 1; // The link doesn't support 32bit-wide atomic transactions 407 unsigned int NoAtomics64bit : 1; // The link doesn't support 64bit-wide atomic transactions 408 unsigned int NoPeerToPeerDMA : 1; // The link doesn't allow device P2P access 409 unsigned int Reserved :27; 410 } ui32; 411 } HSA_LINKPROPERTY; 412 413 414 typedef struct _HsaIoLinkProperties 415 { 416 HSA_IOLINKTYPE IoLinkType; // see above 417 HSAuint32 VersionMajor; // Bus interface version (optional) 418 HSAuint32 VersionMinor; // Bus interface version (optional) 419 420 HSAuint32 NodeFrom; // 421 HSAuint32 NodeTo; // 422 423 HSAuint32 Weight; // weight factor (derived from CDIT) 424 425 HSAuint32 MinimumLatency; // minimum cost of time to transfer (rounded to ns) 426 HSAuint32 MaximumLatency; // maximum cost of time to transfer (rounded to ns) 427 HSAuint32 MinimumBandwidth; // minimum interface Bandwidth in MB/s 428 HSAuint32 MaximumBandwidth; // maximum interface Bandwidth in MB/s 429 HSAuint32 RecTransferSize; // recommended transfer size to reach maximum bandwidth in Bytes 430 HSA_LINKPROPERTY Flags; // override flags (may be active for specific platforms) 431 } HsaIoLinkProperties; 432 433 // 434 // Memory allocation definitions for the KFD HSA interface 435 // 436 437 typedef struct _HsaMemFlags 438 { 439 union 440 { 441 struct 442 { 443 unsigned int NonPaged : 1; // default = 0: pageable memory 444 unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE 445 unsigned int ReadOnly : 1; // default = 0: Read/Write memory 446 unsigned int PageSize : 2; // see HSA_PAGE_SIZE 447 unsigned int HostAccess : 1; // default = 0: GPU access only 448 unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on 449 // discrete GPU local), allocation may fall back to system memory node 0 450 // memory (= always available). Otherwise no allocation is possible. 451 unsigned int GDSMemory : 1; // default = 0: If set, the allocation will occur in GDS heap. 452 // HostAccess must be 0, all other flags (except NoSubstitute) should 453 // be 0 when setting this entry to 1. GDS allocation may fail due to 454 // limited resources. Application code is required to work without 455 // any allocated GDS memory using regular memory. 456 // Allocation fails on any node without GPU function. 457 unsigned int Scratch : 1; // default = 0: If set, the allocation will occur in GPU "scratch area". 458 // HostAccess must be 0, all other flags (except NoSubstitute) should be 0 459 // when setting this entry to 1. Scratch allocation may fail due to limited 460 // resources. Application code is required to work without any allocation. 461 // Allocation fails on any node without GPU function. 462 unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow 463 // atomic ops processing. On AMD APU, this will use the ATC path on system 464 // memory, irrespective of the NonPaged flag setting (= if NonPaged is set, 465 // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM). 466 // All atomic ops must be supported on this memory. 467 unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however 468 // focused on AMD discrete GPU that support PCIe atomics; the memory 469 // allocation is mapped to allow for PCIe atomics to operate on system 470 // memory, irrespective of NonPaged set or the presence of an ATC path 471 // in the system. The atomic operations supported are limited to SWAP, 472 // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic 473 // increment and decrement via 2-complement arithmetic), which are the 474 // only atomic ops directly supported in PCI Express. 475 // On AMD APU, setting this flag will allocate the same type of memory 476 // as AtomicAccessFull, but it will be considered compatible with 477 // discrete GPU atomic operations access. 478 unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed 479 // for executable code (e.g. queue memory) by the host CPU or the device. 480 // Influences the page attribute setting within the allocation 481 unsigned int CoarseGrain : 1; // default = 0: The memory can be accessed assuming cache 482 // coherency maintained by link infrastructure and HSA agents. 483 // 1: memory consistency needs to be enforced at 484 // synchronization points at dispatch or other software 485 // enforced synchronization boundaries. 486 unsigned int AQLQueueMemory: 1; // default = 0; If 1: The caller indicates that the memory will be used as AQL queue memory. 487 // The KFD will ensure that the memory returned is allocated in the optimal memory location 488 // and optimal alignment requirements 489 unsigned int Reserved : 17; 490 491 } ui32; 492 HSAuint32 Value; 493 }; 494 } HsaMemFlags; 495 496 typedef struct _HsaMemMapFlags 497 { 498 union 499 { 500 struct 501 { 502 unsigned int Reserved1 : 1; // 503 unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE 504 unsigned int ReadOnly : 1; // memory is not modified while mapped 505 // allows migration scale-out 506 unsigned int PageSize : 2; // see HSA_PAGE_SIZE, hint to use 507 // this page size if possible and 508 // smaller than default 509 unsigned int HostAccess : 1; // default = 0: GPU access only 510 unsigned int Migrate : 1; // Hint: Allows migration to local mem 511 // of mapped GPU(s), instead of mapping 512 // physical location 513 unsigned int Probe : 1; // default = 0: Indicates that a range 514 // will be mapped by the process soon, 515 // but does not initiate a map operation 516 // may trigger eviction of nonessential 517 // data from the memory, reduces latency 518 // “cleanup hint” only, may be ignored 519 unsigned int Reserved : 23; 520 } ui32; 521 HSAuint32 Value; 522 }; 523 } HsaMemMapFlags; 524 525 typedef struct _HsaGraphicsResourceInfo { 526 void *MemoryAddress; // For use in hsaKmtMapMemoryToGPU(Nodes) 527 HSAuint64 SizeInBytes; // Buffer size 528 const void *Metadata; // Pointer to metadata owned by Thunk 529 HSAuint32 MetadataSizeInBytes; // Size of metadata 530 HSAuint32 Reserved; // Reserved for future use, will be set to 0 531 } HsaGraphicsResourceInfo; 532 533 typedef enum _HSA_CACHING_TYPE 534 { 535 HSA_CACHING_CACHED = 0, 536 HSA_CACHING_NONCACHED = 1, 537 HSA_CACHING_WRITECOMBINED = 2, 538 HSA_CACHING_RESERVED = 3, 539 HSA_CACHING_NUM_CACHING, 540 HSA_CACHING_SIZE = 0xFFFFFFFF 541 } HSA_CACHING_TYPE; 542 543 typedef enum _HSA_PAGE_SIZE 544 { 545 HSA_PAGE_SIZE_4KB = 0, 546 HSA_PAGE_SIZE_64KB = 1, //64KB pages, not generally available in systems 547 HSA_PAGE_SIZE_2MB = 2, 548 HSA_PAGE_SIZE_1GB = 3, //1GB pages, not generally available in systems 549 } HSA_PAGE_SIZE; 550 551 552 typedef enum _HSA_DEVICE 553 { 554 HSA_DEVICE_CPU = 0, 555 HSA_DEVICE_GPU = 1, 556 MAX_HSA_DEVICE = 2 557 } HSA_DEVICE; 558 559 560 typedef enum _HSA_QUEUE_PRIORITY 561 { 562 HSA_QUEUE_PRIORITY_MINIMUM = -3, 563 HSA_QUEUE_PRIORITY_LOW = -2, 564 HSA_QUEUE_PRIORITY_BELOW_NORMAL = -1, 565 HSA_QUEUE_PRIORITY_NORMAL = 0, 566 HSA_QUEUE_PRIORITY_ABOVE_NORMAL = 1, 567 HSA_QUEUE_PRIORITY_HIGH = 2, 568 HSA_QUEUE_PRIORITY_MAXIMUM = 3, 569 HSA_QUEUE_PRIORITY_NUM_PRIORITY, 570 HSA_QUEUE_PRIORITY_SIZE = 0xFFFFFFFF 571 } HSA_QUEUE_PRIORITY; 572 573 typedef enum _HSA_QUEUE_TYPE 574 { 575 HSA_QUEUE_COMPUTE = 1, // AMD PM4 compatible Compute Queue 576 HSA_QUEUE_SDMA = 2, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc). 577 HSA_QUEUE_MULTIMEDIA_DECODE = 3, // reserved, for HSA multimedia decode queue 578 HSA_QUEUE_MULTIMEDIA_ENCODE = 4, // reserved, for HSA multimedia encode queue 579 580 // the following values indicate a queue type permitted to reference OS graphics 581 // resources through the interoperation API. See [5] "HSA Graphics Interoperation 582 // specification" for more details on use of such resources. 583 584 HSA_QUEUE_COMPUTE_OS = 11, // AMD PM4 compatible Compute Queue 585 HSA_QUEUE_SDMA_OS = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc). 586 HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue 587 HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14, // reserved, for HSA multimedia encode queue 588 589 HSA_QUEUE_COMPUTE_AQL = 21, // HSA AQL packet compatible Compute Queue 590 HSA_QUEUE_DMA_AQL = 22, // HSA AQL packet compatible DMA Queue 591 592 // more types in the future 593 594 HSA_QUEUE_TYPE_SIZE = 0xFFFFFFFF //aligns to 32bit enum 595 } HSA_QUEUE_TYPE; 596 597 typedef struct 598 { 599 HSAuint32 QueueDetailError; // HW specific queue error state 600 HSAuint32 QueueTypeExtended; // HW specific queue type info. 601 // 0 = no information 602 HSAuint32 NumCUAssigned; // size of *CUMaskInfo bit array, Multiple 603 // of 32, 0 = no information 604 HSAuint32* CUMaskInfo; // runtime/system CU assignment for realtime 605 // queue & reserved CU priority. Ptr to 606 // bit-array, each bit represents one CU. 607 // NULL = no information 608 HSAuint32* UserContextSaveArea; // reference to user space context save area 609 HSAuint64 SaveAreaSizeInBytes; // Must be 4-Byte aligned 610 HSAuint32* ControlStackTop; // ptr to the TOS 611 HSAuint64 ControlStackUsedInBytes; // Must be 4-Byte aligned 612 HSAuint64 Reserved1; // runtime/system CU assignment 613 HSAuint64 Reserved2; // runtime/system CU assignment 614 } HsaQueueInfo; 615 616 typedef struct _HsaQueueResource 617 { 618 HSA_QUEUEID QueueId; /** queue ID */ 619 /** Doorbell address to notify HW of a new dispatch */ 620 union 621 { 622 HSAuint32* Queue_DoorBell; 623 HSAuint64* Queue_DoorBell_aql; 624 HSAuint64 QueueDoorBell; 625 }; 626 627 /** virtual address to notify HW of queue write ptr value */ 628 union 629 { 630 HSAuint32* Queue_write_ptr; 631 HSAuint64* Queue_write_ptr_aql; 632 HSAuint64 QueueWptrValue; 633 }; 634 635 /** virtual address updated by HW to indicate current read location */ 636 union 637 { 638 HSAuint32* Queue_read_ptr; 639 HSAuint64* Queue_read_ptr_aql; 640 HSAuint64 QueueRptrValue; 641 }; 642 643 } HsaQueueResource; 644 645 646 //TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform 647 typedef struct _HsaQueueReport 648 { 649 HSAuint32 VMID; //Required on SI to dispatch IB in primary ring 650 void* QueueAddress; //virtual address of UM mapped compute ring 651 HSAuint64 QueueSize; //size of the UM mapped compute ring 652 } HsaQueueReport; 653 654 655 656 typedef enum _HSA_DBG_WAVEOP 657 { 658 HSA_DBG_WAVEOP_HALT = 1, //Halts a wavefront 659 HSA_DBG_WAVEOP_RESUME = 2, //Resumes a wavefront 660 HSA_DBG_WAVEOP_KILL = 3, //Kills a wavefront 661 HSA_DBG_WAVEOP_DEBUG = 4, //Causes wavefront to enter debug mode 662 HSA_DBG_WAVEOP_TRAP = 5, //Causes wavefront to take a trap 663 HSA_DBG_NUM_WAVEOP = 5, 664 HSA_DBG_MAX_WAVEOP = 0xFFFFFFFF 665 } HSA_DBG_WAVEOP; 666 667 typedef enum _HSA_DBG_WAVEMODE 668 { 669 HSA_DBG_WAVEMODE_SINGLE = 0, //send command to a single wave 670 //Broadcast to all wavefronts of all processes is not supported for HSA user mode 671 HSA_DBG_WAVEMODE_BROADCAST_PROCESS = 2, //send to waves within current process 672 HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3, //send to waves within current process on CU 673 HSA_DBG_NUM_WAVEMODE = 3, 674 HSA_DBG_MAX_WAVEMODE = 0xFFFFFFFF 675 } HSA_DBG_WAVEMODE; 676 677 678 typedef enum _HSA_DBG_WAVEMSG_TYPE 679 { 680 HSA_DBG_WAVEMSG_AUTO = 0, 681 HSA_DBG_WAVEMSG_USER = 1, 682 HSA_DBG_WAVEMSG_ERROR = 2, 683 HSA_DBG_NUM_WAVEMSG, 684 HSA_DBG_MAX_WAVEMSG = 0xFFFFFFFF 685 } HSA_DBG_WAVEMSG_TYPE; 686 687 typedef enum _HSA_DBG_WATCH_MODE 688 { 689 HSA_DBG_WATCH_READ = 0, //Read operations only 690 HSA_DBG_WATCH_NONREAD = 1, //Write or Atomic operations only 691 HSA_DBG_WATCH_ATOMIC = 2, //Atomic Operations only 692 HSA_DBG_WATCH_ALL = 3, //Read, Write or Atomic operations 693 HSA_DBG_WATCH_NUM, 694 HSA_DBG_WATCH_SIZE = 0xFFFFFFFF 695 } HSA_DBG_WATCH_MODE; 696 697 698 //This structure is hardware specific and may change in the future 699 typedef struct _HsaDbgWaveMsgAMDGen2 700 { 701 HSAuint32 Value; 702 HSAuint32 Reserved2; 703 704 } HsaDbgWaveMsgAMDGen2; 705 706 typedef union _HsaDbgWaveMessageAMD 707 { 708 HsaDbgWaveMsgAMDGen2 WaveMsgInfoGen2; 709 //for future HsaDbgWaveMsgAMDGen3; 710 } HsaDbgWaveMessageAMD; 711 712 typedef struct _HsaDbgWaveMessage 713 { 714 void* MemoryVA; // ptr to associated host-accessible data 715 HsaDbgWaveMessageAMD DbgWaveMsg; 716 } HsaDbgWaveMessage; 717 718 719 // 720 // HSA sync primitive, Event and HW Exception notification API definitions 721 // The API functions allow the runtime to define a so-called sync-primitive, a SW object 722 // combining a user-mode provided "syncvar" and a scheduler event that can be signaled 723 // through a defined GPU interrupt. A syncvar is a process virtual memory location of 724 // a certain size that can be accessed by CPU and GPU shader code within the process to set 725 // and query the content within that memory. The definition of the content is determined by 726 // the HSA runtime and potentially GPU shader code interfacing with the HSA runtime. 727 // The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the 728 // user mode instruction stream. 729 // The OS scheduler event is typically associated and signaled by an interrupt issued by 730 // the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be 731 // surfaced by the KFD by this mechanism, too. 732 // 733 734 // these are the new definitions for events 735 typedef enum _HSA_EVENTTYPE 736 { 737 HSA_EVENTTYPE_SIGNAL = 0, //user-mode generated GPU signal 738 HSA_EVENTTYPE_NODECHANGE = 1, //HSA node change (attach/detach) 739 HSA_EVENTTYPE_DEVICESTATECHANGE = 2, //HSA device state change( start/stop ) 740 HSA_EVENTTYPE_HW_EXCEPTION = 3, //GPU shader exception event 741 HSA_EVENTTYPE_SYSTEM_EVENT = 4, //GPU SYSCALL with parameter info 742 HSA_EVENTTYPE_DEBUG_EVENT = 5, //GPU signal for debugging 743 HSA_EVENTTYPE_PROFILE_EVENT = 6, //GPU signal for profiling 744 HSA_EVENTTYPE_QUEUE_EVENT = 7, //GPU signal queue idle state (EOP pm4) 745 HSA_EVENTTYPE_MEMORY = 8, //GPU signal for signaling memory access faults and memory subsystem issues 746 //... 747 HSA_EVENTTYPE_MAXID, 748 HSA_EVENTTYPE_TYPE_SIZE = 0xFFFFFFFF 749 } HSA_EVENTTYPE; 750 751 typedef HSAuint32 HSA_EVENTID; 752 753 // 754 // Subdefinitions for various event types: Syncvar 755 // 756 757 typedef struct _HsaSyncVar 758 { 759 union 760 { 761 void* UserData; //pointer to user mode data 762 HSAuint64 UserDataPtrValue; //64bit compatibility of value 763 } SyncVar; 764 HSAuint64 SyncVarSize; 765 } HsaSyncVar; 766 767 // 768 // Subdefinitions for various event types: NodeChange 769 // 770 771 typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS 772 { 773 HSA_EVENTTYPE_NODECHANGE_ADD = 0, 774 HSA_EVENTTYPE_NODECHANGE_REMOVE = 1, 775 HSA_EVENTTYPE_NODECHANGE_SIZE = 0xFFFFFFFF 776 } HSA_EVENTTYPE_NODECHANGE_FLAGS; 777 778 typedef struct _HsaNodeChange 779 { 780 HSA_EVENTTYPE_NODECHANGE_FLAGS Flags; // HSA node added/removed on the platform 781 } HsaNodeChange; 782 783 // 784 // Sub-definitions for various event types: DeviceStateChange 785 // 786 787 typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS 788 { 789 HSA_EVENTTYPE_DEVICESTATUSCHANGE_START = 0, //device started (and available) 790 HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP = 1, //device stopped (i.e. unavailable) 791 HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE = 0xFFFFFFFF 792 } HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS; 793 794 typedef struct _HsaDeviceStateChange 795 { 796 HSAuint32 NodeId; // F-NUMA node that contains the device 797 HSA_DEVICE Device; // device type: GPU or CPU 798 HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags; // event flags 799 } HsaDeviceStateChange; 800 801 // 802 // Sub-definitions for various event types: Memory exception 803 // 804 805 typedef enum _HSA_EVENTID_MEMORYFLAGS 806 { 807 HSA_EVENTID_MEMORY_RECOVERABLE = 0, //access fault, recoverable after page adjustment 808 HSA_EVENTID_MEMORY_FATAL_PROCESS = 1, //memory access requires process context destruction, unrecoverable 809 HSA_EVENTID_MEMORY_FATAL_VM = 2, //memory access requires all GPU VA context destruction, unrecoverable 810 } HSA_EVENTID_MEMORYFLAGS; 811 812 typedef struct _HsaAccessAttributeFailure 813 { 814 unsigned int NotPresent : 1; // Page not present or supervisor privilege 815 unsigned int ReadOnly : 1; // Write access to a read-only page 816 unsigned int NoExecute : 1; // Execute access to a page marked NX 817 unsigned int GpuAccess : 1; // Host access only 818 unsigned int ECC : 1; // ECC failure (if supported by HW) 819 unsigned int Imprecise : 1; // Can't determine the exact fault address 820 unsigned int Reserved : 26; // must be 0 821 } HsaAccessAttributeFailure; 822 823 // data associated with HSA_EVENTID_MEMORY 824 typedef struct _HsaMemoryAccessFault 825 { 826 HSAuint32 NodeId; // H-NUMA node that contains the device where the memory access occurred 827 HSAuint64 VirtualAddress; // virtual address this occurred on 828 HsaAccessAttributeFailure Failure; // failure attribute 829 HSA_EVENTID_MEMORYFLAGS Flags; // event flags 830 } HsaMemoryAccessFault; 831 832 typedef struct _HsaEventData 833 { 834 HSA_EVENTTYPE EventType; //event type 835 836 union 837 { 838 // return data associated with HSA_EVENTTYPE_SIGNAL and other events 839 HsaSyncVar SyncVar; 840 841 // data associated with HSA_EVENTTYPE_NODE_CHANGE 842 HsaNodeChange NodeChangeState; 843 844 // data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE 845 HsaDeviceStateChange DeviceState; 846 847 // data associated with HSA_EVENTTYPE_MEMORY 848 HsaMemoryAccessFault MemoryAccessFault; 849 850 } EventData; 851 852 // the following data entries are internal to the KFD & thunk itself. 853 854 HSAuint64 HWData1; // internal thunk store for Event data (OsEventHandle) 855 HSAuint64 HWData2; // internal thunk store for Event data (HWAddress) 856 HSAuint32 HWData3; // internal thunk store for Event data (HWData) 857 } HsaEventData; 858 859 860 typedef struct _HsaEventDescriptor 861 { 862 HSA_EVENTTYPE EventType; // event type to allocate 863 HSAuint32 NodeId; // H-NUMA node containing GPU device that is event source 864 HsaSyncVar SyncVar; // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL 865 } HsaEventDescriptor; 866 867 868 typedef struct _HsaEvent 869 { 870 HSA_EVENTID EventId; 871 HsaEventData EventData; 872 } HsaEvent; 873 874 typedef enum _HsaEventTimeout 875 { 876 HSA_EVENTTIMEOUT_IMMEDIATE = 0, 877 HSA_EVENTTIMEOUT_INFINITE = 0xFFFFFFFF 878 } HsaEventTimeOut; 879 880 typedef struct _HsaClockCounters 881 { 882 HSAuint64 GPUClockCounter; 883 HSAuint64 CPUClockCounter; 884 HSAuint64 SystemClockCounter; 885 HSAuint64 SystemClockFrequencyHz; 886 } HsaClockCounters; 887 888 #ifndef DEFINE_GUID 889 typedef struct _HSA_UUID 890 { 891 HSAuint32 Data1; 892 HSAuint16 Data2; 893 HSAuint16 Data3; 894 HSAuint8 Data4[8]; 895 } HSA_UUID; 896 897 #define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \ 898 static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}} 899 #else 900 #define HSA_UUID GUID 901 #define HSA_DEFINE_UUID DEFINE_GUID 902 #endif 903 904 // HSA_UUID that identifies the GPU ColorBuffer (CB) block 905 // {9ba429c6-af2d-4b38-b349-157271beac6a} 906 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CB, 907 0x9ba429c6, 0xaf2d, 0x4b38, 0xb3, 0x49, 0x15, 0x72, 0x71, 0xbe, 0xac, 0x6a); 908 909 // HSA_UUID that identifies the GPU (CPF) block 910 // {2b0ad2b5-1c43-4f46-a7bc-e119411ea6c9} 911 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CPF, 912 0x2b0ad2b5, 0x1c43, 0x4f46, 0xa7, 0xbc, 0xe1, 0x19, 0x41, 0x1e, 0xa6, 0xc9); 913 914 // HSA_UUID that identifies the GPU (CPG) block 915 // {590ec94d-20f0-448f-8dff-316c679de7ff 916 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CPG, 917 0x590ec94d, 0x20f0, 0x448f, 0x8d, 0xff, 0x31, 0x6c, 0x67, 0x9d, 0xe7, 0xff); 918 919 // HSA_UUID that identifies the GPU (DB) block 920 // {3d1a47fc-0013-4ed4-8306-822ca0b7a6c2 921 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_DB, 922 0x3d1a47fc, 0x0013, 0x4ed4, 0x83, 0x06, 0x82, 0x2c, 0xa0, 0xb7, 0xa6, 0xc2); 923 924 // HSA_UUID that identifies the GPU (GDS) block 925 // {f59276ec-2526-4bf8-8ec0-118f77700dc9 926 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GDS, 927 0xf59276ec, 0x2526, 0x4bf8, 0x8e, 0xc0, 0x11, 0x8f, 0x77, 0x70, 0x0d, 0xc9); 928 929 // HSA_UUID that identifies the GPU (GRBM) block 930 // {8f00933c-c33d-4801-97b7-7007f78573ad 931 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GRBM, 932 0x8f00933c, 0xc33d, 0x4801, 0x97, 0xb7, 0x70, 0x07, 0xf7, 0x85, 0x73, 0xad); 933 934 // HSA_UUID that identifies the GPU (GRBMSE) block 935 // {34ebd8d7-7c8b-4d15-88fa-0e4e4af59ac1 936 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GRBMSE, 937 0x34ebd8d7, 0x7c8b, 0x4d15, 0x88, 0xfa, 0x0e, 0x4e, 0x4a, 0xf5, 0x9a, 0xc1); 938 939 // HSA_UUID that identifies the GPU (IA) block 940 // {34276944-4264-4fcd-9d6e-ae264582ec51 941 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IA, 942 0x34276944, 0x4264, 0x4fcd, 0x9d, 0x6e, 0xae, 0x26, 0x45, 0x82, 0xec, 0x51); 943 944 // HSA_UUID that identifies the GPU Memory Controller (MC) block 945 // {13900B57-4956-4D98-81D0-68521937F59C 946 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC, 947 0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c); 948 949 // HSA_UUID that identifies the GPU (PASC) block 950 // {b0e7fb5d-0efc-4744-b516-5d23dc1fd56c 951 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_PASC, 952 0xb0e7fb5d, 0x0efc, 0x4744, 0xb5, 0x16, 0x5d, 0x23, 0xdc, 0x1f, 0xd5, 0x6c); 953 954 // HSA_UUID that identifies the GPU (PASU) block 955 // {9a152b6a-1fad-45f2-a5bf-f163826bd0cd 956 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_PASU, 957 0x9a152b6a, 0x1fad, 0x45f2, 0xa5, 0xbf, 0xf1, 0x63, 0x82, 0x6b, 0xd0, 0xcd); 958 959 // HSA_UUID that identifies the GPU (SPI) block 960 // {eda81044-d62c-47eb-af89-4f6fbf3b38e0 961 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SPI, 962 0xeda81044, 0xd62c, 0x47eb, 0xaf, 0x89, 0x4f, 0x6f, 0xbf, 0x3b, 0x38, 0xe0); 963 964 // HSA_UUID that identifies the GPU (SRBM) block 965 // {9f8040e0-6830-4019-acc8-463c9e445b89 966 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SRBM, 967 0x9f8040e0, 0x6830, 0x4019, 0xac, 0xc8, 0x46, 0x3c, 0x9e, 0x44, 0x5b, 0x89); 968 969 // GUID that identifies the GPU Shader Sequencer (SQ) block 970 // {B5C396B6-D310-47E4-86FC-5CC3043AF508} 971 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ, 972 0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8); 973 974 // HSA_UUID that identifies the GPU (SX) block 975 // {bdb8d737-43cc-4162-be52-51cfb847beaf} 976 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SX, 977 0xbdb8d737, 0x43cc, 0x4162, 0xbe, 0x52, 0x51, 0xcf, 0xb8, 0x47, 0xbe, 0xaf); 978 979 // HSA_UUID that identifies the GPU (TA) block 980 // {c01ee43d-ad92-44b1-8ab9-be5e696ceea7} 981 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TA, 982 0xc01ee43d, 0xad92, 0x44b1, 0x8a, 0xb9, 0xbe, 0x5e, 0x69, 0x6c, 0xee, 0xa7); 983 984 // HSA_UUID that identifies the GPU TextureCache (TCA) block 985 // {333e393f-e147-4f49-a6d1-60914c7086b0} 986 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCA, 987 0x333e393f, 0xe147, 0x4f49, 0xa6, 0xd1,0x60, 0x91, 0x4c, 0x70, 0x86, 0xb0); 988 989 // HSA_UUID that identifies the GPU TextureCache (TCC) block 990 // {848ce855-d805-4566-a8ab-73e884cc6bff} 991 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCC, 992 0x848ce855, 0xd805, 0x4566, 0xa8, 0xab, 0x73, 0xe8, 0x84, 0xcc, 0x6b, 0xff); 993 994 // HSA_UUID that identifies the GPU (TCP) block 995 // {e10a013b-17d4-4bf5-b089-429591059b60} 996 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCP, 997 0xe10a013b, 0x17d4, 0x4bf5, 0xb0, 0x89, 0x42, 0x95, 0x91, 0x05, 0x9b, 0x60); 998 999 // HSA_UUID that identifies the GPU (TCS) block 1000 // {4126245c-4d96-4d1a-8aed-a939d4cc8ec9} 1001 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCS, 1002 0x4126245c, 0x4d96, 0x4d1a, 0x8a, 0xed, 0xa9, 0x39, 0xd4, 0xcc, 0x8e, 0xc9); 1003 1004 // HSA_UUID that identifies the GPU (TD) block 1005 // {7d7c0fe4-fe41-4fea-92c9-4544d7706dc6} 1006 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TD, 1007 0x7d7c0fe4, 0xfe41, 0x4fea, 0x92, 0xc9, 0x45, 0x44, 0xd7, 0x70, 0x6d, 0xc6); 1008 1009 // HSA_UUID that identifies the GPU (VGT) block 1010 // {0b6a8cb7-7a01-409f-a22c-3014854f1359} 1011 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_VGT, 1012 0x0b6a8cb7, 0x7a01, 0x409f, 0xa2, 0x2c, 0x30, 0x14, 0x85, 0x4f, 0x13, 0x59); 1013 1014 // HSA_UUID that identifies the GPU (WD) block 1015 // {0e176789-46ed-4b02-972a-916d2fac244a} 1016 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_WD, 1017 0x0e176789, 0x46ed, 0x4b02, 0x97, 0x2a, 0x91, 0x6d, 0x2f, 0xac, 0x24, 0x4a); 1018 1019 // GUID that identifies the IMOMMUv2 HW device 1020 // {80969879-B0F6-4BE6-97F6-6A6300F5101D} 1021 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2, 1022 0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d); 1023 1024 // GUID that identifies the KFD 1025 // {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A} 1026 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER, 1027 0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa); 1028 1029 typedef enum _HSA_PROFILE_TYPE 1030 { 1031 HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only) 1032 HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously 1033 //writes to memory on updates (KFD access only) 1034 HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE = 2, //user-queue accessible counter 1035 HSA_PROFILE_TYPE_NONPRIV_STREAMING = 3, //user-queue accessible counter 1036 //... 1037 HSA_PROFILE_TYPE_NUM, 1038 1039 HSA_PROFILE_TYPE_SIZE = 0xFFFFFFFF // In order to align to 32-bit value 1040 } HSA_PROFILE_TYPE; 1041 1042 1043 typedef struct _HsaCounterFlags 1044 { 1045 union 1046 { 1047 struct 1048 { 1049 unsigned int Global : 1; // counter is global 1050 // (not tied to VMID/WAVE/CU, ...) 1051 unsigned int Resettable : 1; // counter can be reset by SW 1052 // (always to 0?) 1053 unsigned int ReadOnly : 1; // counter is read-only 1054 // (but may be reset, if indicated) 1055 unsigned int Stream : 1; // counter has streaming capability 1056 // (after trigger, updates buffer) 1057 unsigned int Reserved : 28; 1058 } ui32; 1059 HSAuint32 Value; 1060 }; 1061 } HsaCounterFlags; 1062 1063 1064 typedef struct _HsaCounter 1065 { 1066 HSA_PROFILE_TYPE Type; // specifies the counter type 1067 HSAuint64 CounterId; // indicates counter register offset 1068 HSAuint32 CounterSizeInBits; // indicates relevant counter bits 1069 HSAuint64 CounterMask; // bitmask for counter value (if applicable) 1070 HsaCounterFlags Flags; // Property flags (see above) 1071 HSAuint32 BlockIndex; // identifies block the counter belongs to, 1072 // value may be 0 to NumBlocks 1073 } HsaCounter; 1074 1075 1076 typedef struct _HsaCounterBlockProperties 1077 { 1078 HSA_UUID BlockId; // specifies the block location 1079 HSAuint32 NumCounters; // How many counters are available? 1080 // (sizes Counters[] array below) 1081 HSAuint32 NumConcurrent; // How many counter slots are available 1082 // in block? 1083 HsaCounter Counters[1]; // Start of counter array 1084 // (NumCounters elements total) 1085 } HsaCounterBlockProperties; 1086 1087 1088 typedef struct _HsaCounterProperties 1089 { 1090 HSAuint32 NumBlocks; // How many profilable block are available? 1091 // (sizes Blocks[] array below) 1092 HSAuint32 NumConcurrent; // How many blocks slots can be queried 1093 // concurrently by HW? 1094 HsaCounterBlockProperties Blocks[1]; // Start of block array 1095 // (NumBlocks elements total) 1096 } HsaCounterProperties; 1097 1098 typedef HSAuint64 HSATraceId; 1099 1100 typedef struct _HsaPmcTraceRoot 1101 { 1102 HSAuint64 TraceBufferMinSizeBytes;// (page aligned) 1103 HSAuint32 NumberOfPasses; 1104 HSATraceId TraceId; 1105 } HsaPmcTraceRoot; 1106 1107 typedef struct _HsaGpuTileConfig 1108 { 1109 HSAuint32 *TileConfig; 1110 HSAuint32 *MacroTileConfig; 1111 HSAuint32 NumTileConfigs; 1112 HSAuint32 NumMacroTileConfigs; 1113 1114 HSAuint32 GbAddrConfig; 1115 1116 HSAuint32 NumBanks; 1117 HSAuint32 NumRanks; 1118 /* 9 dwords on 64-bit system */ 1119 HSAuint32 Reserved[7]; /* Round up to 16 dwords for future extension */ 1120 } HsaGpuTileConfig; 1121 1122 typedef enum _HSA_POINTER_TYPE { 1123 HSA_POINTER_UNKNOWN = 0, 1124 HSA_POINTER_ALLOCATED = 1, // Allocated with hsaKmtAllocMemory (except scratch) 1125 HSA_POINTER_REGISTERED_USER = 2, // Registered user pointer 1126 HSA_POINTER_REGISTERED_GRAPHICS = 3 // Registered graphics buffer 1127 // (hsaKmtRegisterGraphicsToNodes) 1128 } HSA_POINTER_TYPE; 1129 1130 typedef struct _HsaPointerInfo { 1131 HSA_POINTER_TYPE Type; // Pointer type 1132 HSAuint32 Node; // Node where the memory is located 1133 HsaMemFlags MemFlags; // Only valid for HSA_POINTER_ALLOCATED 1134 void *CPUAddress; // Start address for CPU access 1135 HSAuint64 GPUAddress; // Start address for GPU access 1136 HSAuint64 SizeInBytes; // Size in bytes 1137 HSAuint32 NRegisteredNodes; // Number of nodes the memory is registered to 1138 HSAuint32 NMappedNodes; // Number of nodes the memory is mapped to 1139 const HSAuint32 *RegisteredNodes; // Array of registered nodes 1140 const HSAuint32 *MappedNodes; // Array of mapped nodes 1141 void *UserData; // User data associated with the memory 1142 } HsaPointerInfo; 1143 1144 typedef HSAuint32 HsaSharedMemoryHandle[8]; 1145 1146 typedef struct _HsaMemoryRange { 1147 void *MemoryAddress; // Pointer to GPU memory 1148 HSAuint64 SizeInBytes; // Size of above memory 1149 } HsaMemoryRange; 1150 1151 #pragma pack(pop, hsakmttypes_h) 1152 1153 1154 #ifdef __cplusplus 1155 } //extern "C" 1156 #endif 1157 1158 #endif //_HSAKMTTYPES_H_ 1159