1 /*
2  * Copyright © 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person
5  * obtaining a copy of this software and associated documentation
6  * files (the "Software"), to deal in the Software without
7  * restriction, including without limitation the rights to use, copy,
8  * modify, merge, publish, distribute, sublicense, and/or sell copies
9  * of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including
13  * the next paragraph) shall be included in all copies or substantial
14  * portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23  * DEALINGS IN THE SOFTWARE.
24  */
25 
26 #ifndef _HSAKMTTYPES_H_
27 #define _HSAKMTTYPES_H_
28 
29 //the definitions and THUNK API are version specific - define the version numbers here
30 #define HSAKMT_VERSION_MAJOR    0
31 #define HSAKMT_VERSION_MINOR    99
32 
33 
34 #ifdef __cplusplus
35 extern "C" {
36 #endif
37 
38 #if defined(_WIN64) || defined(_WINDOWS) || defined(_WIN32)
39 
40     #if defined(_WIN32)
41         #define HSAKMTAPI  __stdcall
42     #else
43         #define HSAKMTAPI
44     #endif
45 
46     typedef unsigned char      HSAuint8;
47     typedef char               HSAint8;
48     typedef unsigned short     HSAuint16;
49     typedef signed short       HSAint16;
50     typedef unsigned __int32   HSAuint32;
51     typedef signed __int32     HSAint32;
52     typedef signed __int64     HSAint64;
53     typedef unsigned __int64   HSAuint64;
54 
55 #elif defined(__linux__) || defined(__FreeBSD__) || defined(__DragonFly__)
56 
57 #include <stdbool.h>
58 #include <stdint.h>
59 
60     #define HSAKMTAPI
61 
62     typedef uint8_t     HSAuint8;
63     typedef int8_t      HSAint8;
64     typedef uint16_t	HSAuint16;
65     typedef int16_t	HSAint16;
66     typedef uint32_t	HSAuint32;
67     typedef int32_t 	HSAint32;
68     typedef int64_t	HSAint64;
69     typedef uint64_t	HSAuint64;
70 
71 #endif
72 
73 typedef void*              HSA_HANDLE;
74 typedef HSAuint64          HSA_QUEUEID;
75 
76 // This is included in order to force the alignments to be 4 bytes so that
77 // it avoids extra padding added by the compiler when a 64-bit binary is generated.
78 #pragma pack(push, hsakmttypes_h, 4)
79 
80 //
81 // HSA STATUS codes returned by the KFD Interfaces
82 //
83 
84 typedef enum _HSAKMT_STATUS
85 {
86     HSAKMT_STATUS_SUCCESS                      = 0,  // Operation successful
87     HSAKMT_STATUS_ERROR                        = 1,  // General error return if not otherwise specified
88     HSAKMT_STATUS_DRIVER_MISMATCH              = 2,  // User mode component is not compatible with kernel HSA driver
89 
90     HSAKMT_STATUS_INVALID_PARAMETER            = 3,  // KFD identifies input parameters invalid
91     HSAKMT_STATUS_INVALID_HANDLE               = 4,  // KFD identifies handle parameter invalid
92     HSAKMT_STATUS_INVALID_NODE_UNIT            = 5,  // KFD identifies node or unit parameter invalid
93 
94     HSAKMT_STATUS_NO_MEMORY                    = 6,  // No memory available (when allocating queues or memory)
95     HSAKMT_STATUS_BUFFER_TOO_SMALL             = 7,  // A buffer needed to handle a request is too small
96 
97     HSAKMT_STATUS_NOT_IMPLEMENTED              = 10, // KFD function is not implemented for this set of paramters
98     HSAKMT_STATUS_NOT_SUPPORTED                = 11, // KFD function is not supported on this node
99     HSAKMT_STATUS_UNAVAILABLE                  = 12, // KFD function is not available currently on this node (but
100                                                   // may be at a later time)
101 
102     HSAKMT_STATUS_KERNEL_IO_CHANNEL_NOT_OPENED = 20, // KFD driver path not opened
103     HSAKMT_STATUS_KERNEL_COMMUNICATION_ERROR   = 21, // user-kernel mode communication failure
104     HSAKMT_STATUS_KERNEL_ALREADY_OPENED        = 22, // KFD driver path already opened
105     HSAKMT_STATUS_HSAMMU_UNAVAILABLE           = 23, // ATS/PRI 1.1 (Address Translation Services) not available
106                                                   // (IOMMU driver not installed or not-available)
107 
108     HSAKMT_STATUS_WAIT_FAILURE                 = 30, // The wait operation failed
109     HSAKMT_STATUS_WAIT_TIMEOUT                 = 31, // The wait operation timed out
110 
111     HSAKMT_STATUS_MEMORY_ALREADY_REGISTERED    = 35, // Memory buffer already registered
112     HSAKMT_STATUS_MEMORY_NOT_REGISTERED        = 36, // Memory buffer not registered
113     HSAKMT_STATUS_MEMORY_ALIGNMENT             = 37, // Memory parameter not aligned
114 
115 } HSAKMT_STATUS;
116 
117 //
118 // HSA KFD interface version information. Calling software has to validate that it meets
119 // the minimum interface version as described in the API specification.
120 // All future structures will be extended in a backward compatible fashion.
121 //
122 
123 typedef struct _HsaVersionInfo
124 {
125     HSAuint32    KernelInterfaceMajorVersion;    // supported kernel interface major version
126     HSAuint32    KernelInterfaceMinorVersion;    // supported kernel interface minor version
127 } HsaVersionInfo;
128 
129 //
130 // HSA Topology Discovery Infrastructure structure definitions.
131 // The infrastructure implementation is based on design specified in the Kernel HSA Driver ADD
132 // The discoverable data is retrieved from ACPI structures in the platform infrastructure, as defined
133 // in the "Heterogeneous System Architecture Detail Topology" specification.
134 //
135 // The following structure is returned on a call to hsaKmtAcquireSystemProperties() as output.
136 // When the call is made within a process context, a "snapshot" of the topology information
137 // is taken within the KFD to avoid any changes during the enumeration process.
138 // The Snapshot is released when hsaKmtReleaseSystemProperties() is called
139 // or when the process exits or is terminated.
140 //
141 
142 typedef struct _HsaSystemProperties
143 {
144     HSAuint32    NumNodes;         // the number of "H-NUMA" memory nodes.
145                                    // each node represents a discoverable node of the system
146                                    // All other enumeration is done on a per-node basis
147 
148     HSAuint32    PlatformOem;      // identifies HSA platform, reflects the OEMID in the CRAT
149     HSAuint32    PlatformId;       // HSA platform ID, reflects OEM TableID in the CRAT
150     HSAuint32    PlatformRev;      // HSA platform revision, reflects Platform Table Revision ID
151 } HsaSystemProperties;
152 
153 typedef union
154 {
155     HSAuint32 Value;
156     struct
157     {
158         unsigned int uCode    : 10;  // ucode packet processor version
159         unsigned int Major    :  6;  // GFXIP Major engine version
160         unsigned int Minor    :  8;  // GFXIP Minor engine version
161         unsigned int Stepping :  8;  // GFXIP Stepping info
162     }ui32;
163 } HSA_ENGINE_ID;
164 
165 typedef union
166 {
167     HSAuint32 Value;
168     struct
169     {
170         unsigned int uCodeSDMA: 10; // ucode version SDMA engine
171         unsigned int uCodeRes : 10; // ucode version (reserved)
172         unsigned int Reserved : 12; // Reserved, must be 0
173     };
174 } HSA_ENGINE_VERSION;
175 
176 typedef union
177 {
178     HSAuint32 Value;
179     struct
180     {
181         unsigned int HotPluggable        : 1;    // the node may be removed by some system action
182                                                  // (event will be sent)
183         unsigned int HSAMMUPresent       : 1;    // This node has an ATS/PRI 1.1 compatible
184                                                  // translation agent in the system (e.g. IOMMUv2)
185         unsigned int SharedWithGraphics  : 1;    // this HSA nodes' GPU function is also used for OS primary
186                                                  // graphics render (= UI)
187         unsigned int QueueSizePowerOfTwo : 1;    // This node GPU requires the queue size to be a power of 2 value
188         unsigned int QueueSize32bit      : 1;    // This node GPU requires the queue size to be less than 4GB
189         unsigned int QueueIdleEvent      : 1;    // This node GPU supports notification on Queue Idle
190         unsigned int VALimit             : 1;    // This node GPU has limited VA range for platform
191                                                  // (typical 40bit). Affects shared VM use for 64bit apps
192         unsigned int WatchPointsSupported: 1;	 // Indicates if Watchpoints are available on the node.
193         unsigned int WatchPointsTotalBits: 4;    // ld(Watchpoints) available. To determine the number use 2^value
194 
195         unsigned int DoorbellType        : 2;    // 0: This node has pre-1.0 doorbell characteristic
196                                                  // 1: This node has 1.0 doorbell characteristic
197                                                  // 2,3: reserved for future use
198         unsigned int AQLQueueDoubleMap    : 1;	 // The unit needs a VA “double map”
199         unsigned int Reserved            : 17;
200     } ui32;
201 } HSA_CAPABILITY;
202 
203 
204 //
205 // HSA node properties. This structure is an output parameter of hsaKmtGetNodeProperties()
206 // The application or runtime can use the information herein to size the topology management structures
207 // Unless there is some very weird setup, there is at most one "GPU" device (with a certain number
208 // of throughput compute units (= SIMDs) associated with a H-NUMA node.
209 //
210 
211 #define HSA_PUBLIC_NAME_SIZE        64   // Marketing name string size
212 
213 typedef struct _HsaNodeProperties
214 {
215     HSAuint32       NumCPUCores;       // # of latency (= CPU) cores present on this HSA node.
216                                        // This value is 0 for a HSA node with no such cores,
217                                        // e.g a "discrete HSA GPU"
218     HSAuint32       NumFComputeCores;  // # of HSA throughtput (= GPU) FCompute cores ("SIMD") present in a node.
219                                        // This value is 0 if no FCompute cores are present (e.g. pure "CPU node").
220     HSAuint32       NumMemoryBanks;    // # of discoverable memory bank affinity properties on this "H-NUMA" node.
221     HSAuint32       NumCaches;         // # of discoverable cache affinity properties on this "H-NUMA"  node.
222 
223     HSAuint32       NumIOLinks;        // # of discoverable IO link affinity properties of this node
224                                        // connecting to other nodes.
225 
226     HSAuint32       CComputeIdLo;      // low value of the logical processor ID of the latency (= CPU)
227                                        // cores available on this node
228     HSAuint32       FComputeIdLo;      // low value of the logical processor ID of the throughput (= GPU)
229                                        // units available on this node
230 
231     HSA_CAPABILITY  Capability;        // see above
232 
233     HSAuint32       MaxWavesPerSIMD;   // This identifies the max. number of launched waves per SIMD.
234                                        // If NumFComputeCores is 0, this value is ignored.
235     HSAuint32       LDSSizeInKB;       // Size of Local Data Store in Kilobytes per SIMD Wavefront
236     HSAuint32       GDSSizeInKB;       // Size of Global Data Store in Kilobytes shared across SIMD Wavefronts
237 
238     HSAuint32       WaveFrontSize;     // Number of SIMD cores per wavefront executed, typically 64,
239                                        // may be 32 or a different value for some HSA based architectures
240 
241     HSAuint32       NumShaderBanks;    // Number of Shader Banks or Shader Engines, typical values are 1 or 2
242 
243 
244     HSAuint32       NumArrays;         // Number of SIMD arrays per engine
245     HSAuint32       NumCUPerArray;     // Number of Compute Units (CU) per SIMD array
246     HSAuint32       NumSIMDPerCU;      // Number of SIMD representing a Compute Unit (CU)
247 
248     HSAuint32       MaxSlotsScratchCU; // Number of temp. memory ("scratch") wave slots available to access,
249                                        // may be 0 if HW has no restrictions
250 
251     HSA_ENGINE_ID   EngineId;          // Identifier (rev) of the GPU uEngine or Firmware, may be 0
252 
253     HSAuint16       VendorId;          // GPU vendor id; 0 on latency (= CPU)-only nodes
254     HSAuint16       DeviceId;          // GPU device id; 0 on latency (= CPU)-only nodes
255 
256     HSAuint32       LocationId;        // GPU BDF (Bus/Device/function number) - identifies the device
257                                        // location in the overall system
258     HSAuint64       LocalMemSize;       // Local memory size
259     HSAuint32       MaxEngineClockMhzFCompute;  // maximum engine clocks for CPU and
260     HSAuint32       MaxEngineClockMhzCCompute;  // GPU function, including any boost caopabilities,
261     HSAint32        DrmRenderMinor;             // DRM render device minor device number
262     HSAuint16       MarketingName[HSA_PUBLIC_NAME_SIZE];   // Public name of the "device" on the node (board or APU name).
263                                        // Unicode string
264     HSAuint8        AMDName[HSA_PUBLIC_NAME_SIZE];   //CAL Name of the "device", ASCII
265     HSA_ENGINE_VERSION uCodeEngineVersions;
266     HSAuint8        Reserved[60];
267 } HsaNodeProperties;
268 
269 
270 typedef enum _HSA_HEAPTYPE
271 {
272     HSA_HEAPTYPE_SYSTEM                = 0,
273     HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC   = 1, // CPU "visible" part of GPU device local memory (for discrete GPU)
274     HSA_HEAPTYPE_FRAME_BUFFER_PRIVATE  = 2, // CPU "invisible" part of GPU device local memory (for discrete GPU)
275                                             // All HSA accessible memory is per definition "CPU visible"
276                                             // "Private memory" is relevant for graphics interop only.
277     HSA_HEAPTYPE_GPU_GDS               = 3, // GPU internal memory (GDS)
278     HSA_HEAPTYPE_GPU_LDS               = 4, // GPU internal memory (LDS)
279     HSA_HEAPTYPE_GPU_SCRATCH           = 5, // GPU special memory (scratch)
280     HSA_HEAPTYPE_DEVICE_SVM            = 6, // sys-memory mapped by device page tables
281 
282     HSA_HEAPTYPE_NUMHEAPTYPES,
283     HSA_HEAPTYPE_SIZE                  = 0xFFFFFFFF
284 } HSA_HEAPTYPE;
285 
286 typedef union
287 {
288     HSAuint32 MemoryProperty;
289     struct
290     {
291         unsigned int HotPluggable      : 1; // the memory may be removed by some system action,
292                                             // memory should be used for temporary data
293         unsigned int NonVolatile       : 1; // memory content is preserved across a power-off cycle.
294         unsigned int Reserved          :30;
295     } ui32;
296 } HSA_MEMORYPROPERTY;
297 
298 
299 //
300 // Discoverable HSA Memory properties.
301 // The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
302 //
303 
304 typedef struct _HsaMemoryProperties
305 {
306     HSA_HEAPTYPE    HeapType;          // system or frame buffer,
307     union
308     {
309         HSAuint64   SizeInBytes;       // physical memory size of the memory range in bytes
310         struct
311         {
312             HSAuint32 SizeInBytesLow;  // physical memory size of the memory range in bytes (lower 32bit)
313             HSAuint32 SizeInBytesHigh; // physical memory size of the memory range in bytes (higher 32bit)
314         } ui32;
315     };
316     HSA_MEMORYPROPERTY  Flags;         // See definitions above
317 
318     HSAuint32    Width;                // memory width - the number of parallel bits of the memory interface
319     HSAuint32    MemoryClockMax;       // memory clock for the memory, this allows computing the available bandwidth
320                                        // to the memory when needed
321     HSAuint64    VirtualBaseAddress;   // if set to value != 0, indicates the virtual base address of the memory
322                                        // in process virtual space
323 } HsaMemoryProperties;
324 
325 //
326 // Discoverable Cache Properties. (optional).
327 // The structure is the output parameter of the hsaKmtGetNodeMemoryProperties() function
328 // Any of the parameters may be 0 (= not defined)
329 //
330 
331 #define HSA_CPU_SIBLINGS            256
332 #define HSA_PROCESSORID_ALL         0xFFFFFFFF
333 
334 typedef union
335 {
336     HSAuint32 Value;
337     struct
338     {
339         unsigned int Data           : 1;
340         unsigned int Instruction    : 1;
341         unsigned int CPU            : 1;
342         unsigned int HSACU          : 1;
343         unsigned int Reserved       :28;
344     } ui32;
345 } HsaCacheType;
346 
347 typedef struct _HaCacheProperties
348 {
349     HSAuint32    ProcessorIdLow;   // Identifies the processor number
350 
351     HSAuint32    CacheLevel;       // Integer representing level: 1, 2, 3, 4, etc
352     HSAuint32    CacheSize;        // Size of the cache
353     HSAuint32    CacheLineSize;    // Cache line size in bytes
354     HSAuint32    CacheLinesPerTag; // Cache lines per Cache Tag
355     HSAuint32    CacheAssociativity; // Cache Associativity
356     HSAuint32    CacheLatency;     // Cache latency in ns
357     HsaCacheType CacheType;
358     HSAuint32    SiblingMap[HSA_CPU_SIBLINGS];
359 } HsaCacheProperties;
360 
361 
362 //
363 // Discoverable CPU Compute Properties. (optional).
364 // The structure is the output parameter of the hsaKmtGetCComputeProperties() function
365 // Any of the parameters may be 0 (= not defined)
366 //
367 
368 typedef struct _HsaCComputeProperties
369 {
370     HSAuint32    SiblingMap[HSA_CPU_SIBLINGS];
371 } HsaCComputeProperties;
372 
373 //
374 // Discoverable IoLink Properties (optional).
375 // The structure is the output parameter of the hsaKmtGetIoLinkProperties() function.
376 // Any of the parameters may be 0 (= not defined)
377 //
378 
379 typedef enum _HSA_IOLINKTYPE {
380     HSA_IOLINKTYPE_UNDEFINED      = 0,
381     HSA_IOLINKTYPE_HYPERTRANSPORT = 1,
382     HSA_IOLINKTYPE_PCIEXPRESS     = 2,
383     HSA_IOLINKTYPE_AMBA           = 3,
384     HSA_IOLINKTYPE_MIPI           = 4,
385     HSA_IOLINK_TYPE_QPI_1_1       = 5,
386     HSA_IOLINK_TYPE_RESERVED1     = 6,
387     HSA_IOLINK_TYPE_RESERVED2     = 7,
388     HSA_IOLINK_TYPE_RAPID_IO      = 8,
389     HSA_IOLINK_TYPE_INFINIBAND    = 9,
390     HSA_IOLINK_TYPE_RESERVED3     = 10,
391     HSA_IOLINKTYPE_OTHER          = 11,
392     HSA_IOLINKTYPE_NUMIOLINKTYPES,
393     HSA_IOLINKTYPE_SIZE           = 0xFFFFFFFF
394 } HSA_IOLINKTYPE;
395 
396 typedef union
397 {
398     HSAuint32 LinkProperty;
399     struct
400     {
401         unsigned int Override          : 1;  // bus link properties are determined by this structure
402                                              // not by the HSA_IOLINKTYPE. The other flags are valid
403                                              // only if this bit is set to one
404         unsigned int NonCoherent       : 1;  // The link doesn't support coherent transactions
405                                              // memory accesses across must not be set to "host cacheable"!
406         unsigned int NoAtomics32bit    : 1;  // The link doesn't support 32bit-wide atomic transactions
407         unsigned int NoAtomics64bit    : 1;  // The link doesn't support 64bit-wide atomic transactions
408         unsigned int NoPeerToPeerDMA   : 1;  // The link doesn't allow device P2P access
409         unsigned int Reserved          :27;
410     } ui32;
411 } HSA_LINKPROPERTY;
412 
413 
414 typedef struct _HsaIoLinkProperties
415 {
416     HSA_IOLINKTYPE  IoLinkType;      // see above
417     HSAuint32    VersionMajor;       // Bus interface version (optional)
418     HSAuint32    VersionMinor;       // Bus interface version (optional)
419 
420     HSAuint32    NodeFrom;           //
421     HSAuint32    NodeTo;             //
422 
423     HSAuint32    Weight;             // weight factor (derived from CDIT)
424 
425     HSAuint32    MinimumLatency;     // minimum cost of time to transfer (rounded to ns)
426     HSAuint32    MaximumLatency;     // maximum cost of time to transfer (rounded to ns)
427     HSAuint32    MinimumBandwidth;   // minimum interface Bandwidth in MB/s
428     HSAuint32    MaximumBandwidth;   // maximum interface Bandwidth in MB/s
429     HSAuint32    RecTransferSize;    // recommended transfer size to reach maximum bandwidth in Bytes
430     HSA_LINKPROPERTY Flags;          // override flags (may be active for specific platforms)
431 } HsaIoLinkProperties;
432 
433 //
434 // Memory allocation definitions for the KFD HSA interface
435 //
436 
437 typedef struct _HsaMemFlags
438 {
439     union
440     {
441         struct
442         {
443             unsigned int NonPaged    : 1; // default = 0: pageable memory
444             unsigned int CachePolicy : 2; // see HSA_CACHING_TYPE
445             unsigned int ReadOnly    : 1; // default = 0: Read/Write memory
446             unsigned int PageSize    : 2; // see HSA_PAGE_SIZE
447             unsigned int HostAccess  : 1; // default = 0: GPU access only
448             unsigned int NoSubstitute: 1; // default = 0: if specific memory is not available on node (e.g. on
449                                           // discrete GPU local), allocation may fall back to system memory node 0
450                                           // memory (= always available). Otherwise no allocation is possible.
451             unsigned int GDSMemory   : 1; // default = 0: If set, the allocation will occur in GDS heap.
452                                           // HostAccess must be 0, all other flags (except NoSubstitute) should
453                                           // be 0 when setting this entry to 1. GDS allocation may fail due to
454                                           // limited resources. Application code is required to work without
455                                           // any allocated GDS memory using regular memory.
456                                           // Allocation fails on any node without GPU function.
457             unsigned int Scratch     : 1; // default = 0: If set, the allocation will occur in GPU "scratch area".
458                                           // HostAccess must be 0, all other flags (except NoSubstitute) should be 0
459                                           // when setting this entry to 1. Scratch allocation may fail due to limited
460                                           // resources. Application code is required to work without any allocation.
461                                           // Allocation fails on any node without GPU function.
462             unsigned int AtomicAccessFull: 1; // default = 0: If set, the memory will be allocated and mapped to allow
463                                               // atomic ops processing. On AMD APU, this will use the ATC path on system
464                                               // memory, irrespective of the NonPaged flag setting (= if NonPaged is set,
465                                               // the memory is pagelocked but mapped through IOMMUv2 instead of GPUVM).
466                                               // All atomic ops must be supported on this memory.
467             unsigned int AtomicAccessPartial: 1; // default = 0: See above for AtomicAccessFull description, however
468                                                  // focused on AMD discrete GPU that support PCIe atomics; the memory
469                                                  // allocation is mapped to allow for PCIe atomics to operate on system
470                                                  // memory, irrespective of NonPaged set or the presence of an ATC path
471                                                  // in the system. The atomic operations supported are limited to SWAP,
472                                                  // CompareAndSwap (CAS) and FetchAdd (this PCIe op allows both atomic
473                                                  // increment and decrement via 2-complement arithmetic), which are the
474                                                  // only atomic ops directly supported in PCI Express.
475                                                  // On AMD APU, setting this flag will allocate the same type of memory
476                                                  // as AtomicAccessFull, but it will be considered compatible with
477                                                  // discrete GPU atomic operations access.
478             unsigned int ExecuteAccess: 1; // default = 0: Identifies if memory is primarily used for data or accessed
479                                            // for executable code (e.g. queue memory) by the host CPU or the device.
480                                            // Influences the page attribute setting within the allocation
481             unsigned int CoarseGrain : 1;  // default = 0: The memory can be accessed assuming cache
482                                            // coherency maintained by link infrastructure and HSA agents.
483                                            // 1: memory consistency needs to be enforced at
484                                            // synchronization points at dispatch or other software
485                                            // enforced synchronization boundaries.
486             unsigned int AQLQueueMemory: 1; // default = 0; If 1: The caller indicates that the memory will be used as AQL queue memory.
487 					    // The KFD will ensure that the memory returned is allocated in the optimal memory location
488 					    // and optimal alignment requirements
489             unsigned int Reserved    : 17;
490 
491         } ui32;
492         HSAuint32 Value;
493     };
494 } HsaMemFlags;
495 
496 typedef struct _HsaMemMapFlags
497 {
498     union
499     {
500         struct
501         {
502             unsigned int Reserved1      :  1; //
503             unsigned int CachePolicy    :  2; // see HSA_CACHING_TYPE
504             unsigned int ReadOnly       :  1; // memory is not modified while mapped
505             	    	    	    	      // allows migration scale-out
506 	    unsigned int PageSize	    :  2; // see HSA_PAGE_SIZE, hint to use
507 					  // this page size if possible and
508 					  // smaller than default
509 	    unsigned int HostAccess     :  1; // default = 0: GPU access only
510 	    unsigned int Migrate        :  1; // Hint: Allows migration to local mem
511 						  // of mapped GPU(s), instead of mapping
512 						  // physical location
513             unsigned int Probe          :  1;     // default = 0: Indicates that a range
514                                                   // will be mapped by the process soon,
515 						  // but does not initiate a map operation
516 						  // may trigger eviction of nonessential
517 						  // data from the memory, reduces latency
518 						  // “cleanup hint” only, may be ignored
519             unsigned int Reserved       : 23;
520         } ui32;
521         HSAuint32 Value;
522     };
523 } HsaMemMapFlags;
524 
525 typedef struct _HsaGraphicsResourceInfo {
526     void       *MemoryAddress;      // For use in hsaKmtMapMemoryToGPU(Nodes)
527     HSAuint64  SizeInBytes;         // Buffer size
528     const void *Metadata;           // Pointer to metadata owned by Thunk
529     HSAuint32  MetadataSizeInBytes; // Size of metadata
530     HSAuint32  Reserved;            // Reserved for future use, will be set to 0
531 } HsaGraphicsResourceInfo;
532 
533 typedef enum _HSA_CACHING_TYPE
534 {
535     HSA_CACHING_CACHED        = 0,
536     HSA_CACHING_NONCACHED     = 1,
537     HSA_CACHING_WRITECOMBINED = 2,
538     HSA_CACHING_RESERVED      = 3,
539     HSA_CACHING_NUM_CACHING,
540     HSA_CACHING_SIZE          = 0xFFFFFFFF
541 } HSA_CACHING_TYPE;
542 
543 typedef enum _HSA_PAGE_SIZE
544 {
545     HSA_PAGE_SIZE_4KB         = 0,
546     HSA_PAGE_SIZE_64KB        = 1,  //64KB pages, not generally available in systems
547     HSA_PAGE_SIZE_2MB         = 2,
548     HSA_PAGE_SIZE_1GB         = 3,  //1GB pages, not generally available in systems
549 } HSA_PAGE_SIZE;
550 
551 
552 typedef enum _HSA_DEVICE
553 {
554     HSA_DEVICE_CPU  = 0,
555     HSA_DEVICE_GPU  = 1,
556     MAX_HSA_DEVICE  = 2
557 } HSA_DEVICE;
558 
559 
560 typedef enum _HSA_QUEUE_PRIORITY
561 {
562     HSA_QUEUE_PRIORITY_MINIMUM        = -3,
563     HSA_QUEUE_PRIORITY_LOW            = -2,
564     HSA_QUEUE_PRIORITY_BELOW_NORMAL   = -1,
565     HSA_QUEUE_PRIORITY_NORMAL         =  0,
566     HSA_QUEUE_PRIORITY_ABOVE_NORMAL   =  1,
567     HSA_QUEUE_PRIORITY_HIGH           =  2,
568     HSA_QUEUE_PRIORITY_MAXIMUM        =  3,
569     HSA_QUEUE_PRIORITY_NUM_PRIORITY,
570     HSA_QUEUE_PRIORITY_SIZE           = 0xFFFFFFFF
571 } HSA_QUEUE_PRIORITY;
572 
573 typedef enum _HSA_QUEUE_TYPE
574 {
575     HSA_QUEUE_COMPUTE            = 1,  // AMD PM4 compatible Compute Queue
576     HSA_QUEUE_SDMA               = 2,  // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
577     HSA_QUEUE_MULTIMEDIA_DECODE  = 3,  // reserved, for HSA multimedia decode queue
578     HSA_QUEUE_MULTIMEDIA_ENCODE  = 4,  // reserved, for HSA multimedia encode queue
579 
580     // the following values indicate a queue type permitted to reference OS graphics
581     // resources through the interoperation API. See [5] "HSA Graphics Interoperation
582     // specification" for more details on use of such resources.
583 
584     HSA_QUEUE_COMPUTE_OS           = 11, // AMD PM4 compatible Compute Queue
585     HSA_QUEUE_SDMA_OS              = 12, // SDMA Queue, used for data transport and format conversion (e.g. (de-)tiling, etc).
586     HSA_QUEUE_MULTIMEDIA_DECODE_OS = 13, // reserved, for HSA multimedia decode queue
587     HSA_QUEUE_MULTIMEDIA_ENCODE_OS = 14,  // reserved, for HSA multimedia encode queue
588 
589     HSA_QUEUE_COMPUTE_AQL          = 21, // HSA AQL packet compatible Compute Queue
590     HSA_QUEUE_DMA_AQL              = 22, // HSA AQL packet compatible DMA Queue
591 
592     // more types in the future
593 
594     HSA_QUEUE_TYPE_SIZE            = 0xFFFFFFFF     //aligns to 32bit enum
595 } HSA_QUEUE_TYPE;
596 
597 typedef struct
598 {
599 	HSAuint32 QueueDetailError;	// HW specific queue error state
600 	HSAuint32 QueueTypeExtended;	// HW specific queue type info.
601 					// 0 = no information
602 	HSAuint32 NumCUAssigned;	// size of *CUMaskInfo bit array, Multiple
603 					// of 32, 0 = no information
604 	HSAuint32* CUMaskInfo;		// runtime/system CU assignment for realtime
605 					// queue & reserved CU priority. Ptr to
606 					// bit-array, each bit represents one CU.
607 					// NULL = no information
608 	HSAuint32* UserContextSaveArea;	// reference to user space context save area
609 	HSAuint64 SaveAreaSizeInBytes;	// Must be 4-Byte aligned
610 	HSAuint32* ControlStackTop;	// ptr to the TOS
611 	HSAuint64 ControlStackUsedInBytes; // Must be 4-Byte aligned
612 	HSAuint64 Reserved1;		// runtime/system CU assignment
613 	HSAuint64 Reserved2;		// runtime/system CU assignment
614 } HsaQueueInfo;
615 
616 typedef struct _HsaQueueResource
617 {
618     HSA_QUEUEID     QueueId;    /** queue ID */
619     /** Doorbell address to notify HW of a new dispatch */
620     union
621     {
622         HSAuint32*  Queue_DoorBell;
623         HSAuint64*  Queue_DoorBell_aql;
624         HSAuint64   QueueDoorBell;
625     };
626 
627     /** virtual address to notify HW of queue write ptr value */
628     union
629     {
630         HSAuint32*  Queue_write_ptr;
631         HSAuint64*  Queue_write_ptr_aql;
632         HSAuint64   QueueWptrValue;
633     };
634 
635     /** virtual address updated by HW to indicate current read location */
636     union
637     {
638         HSAuint32*  Queue_read_ptr;
639         HSAuint64*  Queue_read_ptr_aql;
640         HSAuint64   QueueRptrValue;
641     };
642 
643 } HsaQueueResource;
644 
645 
646 //TEMPORARY structure definition - to be used only on "Triniti + Southern Islands" platform
647 typedef struct _HsaQueueReport
648 {
649     HSAuint32     VMID;         //Required on SI to dispatch IB in primary ring
650     void*         QueueAddress; //virtual address of UM mapped compute ring
651     HSAuint64     QueueSize;    //size of the UM mapped compute ring
652 } HsaQueueReport;
653 
654 
655 
656 typedef enum _HSA_DBG_WAVEOP
657 {
658     HSA_DBG_WAVEOP_HALT        = 1, //Halts a wavefront
659     HSA_DBG_WAVEOP_RESUME      = 2, //Resumes a wavefront
660     HSA_DBG_WAVEOP_KILL        = 3, //Kills a wavefront
661     HSA_DBG_WAVEOP_DEBUG       = 4, //Causes wavefront to enter debug mode
662     HSA_DBG_WAVEOP_TRAP        = 5, //Causes wavefront to take a trap
663     HSA_DBG_NUM_WAVEOP         = 5,
664     HSA_DBG_MAX_WAVEOP         = 0xFFFFFFFF
665 } HSA_DBG_WAVEOP;
666 
667 typedef enum _HSA_DBG_WAVEMODE
668 {
669     HSA_DBG_WAVEMODE_SINGLE               = 0,  //send command to a single wave
670     //Broadcast to all wavefronts of all processes is not supported for HSA user mode
671     HSA_DBG_WAVEMODE_BROADCAST_PROCESS    = 2,  //send to waves within current process
672     HSA_DBG_WAVEMODE_BROADCAST_PROCESS_CU = 3,  //send to waves within current process on CU
673     HSA_DBG_NUM_WAVEMODE                  = 3,
674     HSA_DBG_MAX_WAVEMODE                  = 0xFFFFFFFF
675 } HSA_DBG_WAVEMODE;
676 
677 
678 typedef enum _HSA_DBG_WAVEMSG_TYPE
679 {
680     HSA_DBG_WAVEMSG_AUTO    = 0,
681     HSA_DBG_WAVEMSG_USER    = 1,
682     HSA_DBG_WAVEMSG_ERROR   = 2,
683     HSA_DBG_NUM_WAVEMSG,
684     HSA_DBG_MAX_WAVEMSG     = 0xFFFFFFFF
685 } HSA_DBG_WAVEMSG_TYPE;
686 
687 typedef enum _HSA_DBG_WATCH_MODE
688 {
689     HSA_DBG_WATCH_READ        = 0, //Read operations only
690     HSA_DBG_WATCH_NONREAD     = 1, //Write or Atomic operations only
691     HSA_DBG_WATCH_ATOMIC      = 2, //Atomic Operations only
692     HSA_DBG_WATCH_ALL         = 3, //Read, Write or Atomic operations
693     HSA_DBG_WATCH_NUM,
694     HSA_DBG_WATCH_SIZE        = 0xFFFFFFFF
695 } HSA_DBG_WATCH_MODE;
696 
697 
698 //This structure is hardware specific and may change in the future
699 typedef struct _HsaDbgWaveMsgAMDGen2
700 {
701     HSAuint32      Value;
702     HSAuint32      Reserved2;
703 
704 } HsaDbgWaveMsgAMDGen2;
705 
706 typedef union _HsaDbgWaveMessageAMD
707 {
708     HsaDbgWaveMsgAMDGen2    WaveMsgInfoGen2;
709     //for future HsaDbgWaveMsgAMDGen3;
710 } HsaDbgWaveMessageAMD;
711 
712 typedef struct _HsaDbgWaveMessage
713 {
714     void*                   MemoryVA;         // ptr to associated host-accessible data
715     HsaDbgWaveMessageAMD    DbgWaveMsg;
716 } HsaDbgWaveMessage;
717 
718 
719 //
720 // HSA sync primitive, Event and HW Exception notification API definitions
721 // The API functions allow the runtime to define a so-called sync-primitive, a SW object
722 // combining a user-mode provided "syncvar" and a scheduler event that can be signaled
723 // through a defined GPU interrupt. A syncvar is a process virtual memory location of
724 // a certain size that can be accessed by CPU and GPU shader code within the process to set
725 // and query the content within that memory. The definition of the content is determined by
726 // the HSA runtime and potentially GPU shader code interfacing with the HSA runtime.
727 // The syncvar values may be commonly written through an PM4 WRITE_DATA packet in the
728 // user mode instruction stream.
729 // The OS scheduler event is typically associated and signaled by an interrupt issued by
730 // the GPU, but other HSA system interrupt conditions from other HW (e.g. IOMMUv2) may be
731 // surfaced by the KFD by this mechanism, too.
732 //
733 
734 // these are the new definitions for events
735 typedef enum _HSA_EVENTTYPE
736 {
737     HSA_EVENTTYPE_SIGNAL                     = 0, //user-mode generated GPU signal
738     HSA_EVENTTYPE_NODECHANGE                 = 1, //HSA node change (attach/detach)
739     HSA_EVENTTYPE_DEVICESTATECHANGE          = 2, //HSA device state change( start/stop )
740     HSA_EVENTTYPE_HW_EXCEPTION               = 3, //GPU shader exception event
741     HSA_EVENTTYPE_SYSTEM_EVENT               = 4, //GPU SYSCALL with parameter info
742     HSA_EVENTTYPE_DEBUG_EVENT                = 5, //GPU signal for debugging
743     HSA_EVENTTYPE_PROFILE_EVENT              = 6, //GPU signal for profiling
744     HSA_EVENTTYPE_QUEUE_EVENT                = 7, //GPU signal queue idle state (EOP pm4)
745     HSA_EVENTTYPE_MEMORY                     = 8, //GPU signal for signaling memory access faults and memory subsystem issues
746     //...
747     HSA_EVENTTYPE_MAXID,
748     HSA_EVENTTYPE_TYPE_SIZE                  = 0xFFFFFFFF
749 } HSA_EVENTTYPE;
750 
751 typedef HSAuint32  HSA_EVENTID;
752 
753 //
754 // Subdefinitions for various event types: Syncvar
755 //
756 
757 typedef struct _HsaSyncVar
758 {
759     union
760     {
761         void*       UserData;           //pointer to user mode data
762         HSAuint64   UserDataPtrValue;   //64bit compatibility of value
763     } SyncVar;
764     HSAuint64       SyncVarSize;
765 } HsaSyncVar;
766 
767 //
768 // Subdefinitions for various event types: NodeChange
769 //
770 
771 typedef enum _HSA_EVENTTYPE_NODECHANGE_FLAGS
772 {
773     HSA_EVENTTYPE_NODECHANGE_ADD     = 0,
774     HSA_EVENTTYPE_NODECHANGE_REMOVE  = 1,
775     HSA_EVENTTYPE_NODECHANGE_SIZE    = 0xFFFFFFFF
776 } HSA_EVENTTYPE_NODECHANGE_FLAGS;
777 
778 typedef struct _HsaNodeChange
779 {
780     HSA_EVENTTYPE_NODECHANGE_FLAGS Flags;   // HSA node added/removed on the platform
781 } HsaNodeChange;
782 
783 //
784 // Sub-definitions for various event types: DeviceStateChange
785 //
786 
787 typedef enum _HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS
788 {
789     HSA_EVENTTYPE_DEVICESTATUSCHANGE_START     = 0, //device started (and available)
790     HSA_EVENTTYPE_DEVICESTATUSCHANGE_STOP      = 1, //device stopped (i.e. unavailable)
791     HSA_EVENTTYPE_DEVICESTATUSCHANGE_SIZE      = 0xFFFFFFFF
792 } HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS;
793 
794 typedef struct _HsaDeviceStateChange
795 {
796     HSAuint32                           NodeId;     // F-NUMA node that contains the device
797     HSA_DEVICE                          Device;     // device type: GPU or CPU
798     HSA_EVENTTYPE_DEVICESTATECHANGE_FLAGS Flags;    // event flags
799 } HsaDeviceStateChange;
800 
801 //
802 // Sub-definitions for various event types: Memory exception
803 //
804 
805 typedef enum _HSA_EVENTID_MEMORYFLAGS
806 {
807     HSA_EVENTID_MEMORY_RECOVERABLE           = 0, //access fault, recoverable after page adjustment
808     HSA_EVENTID_MEMORY_FATAL_PROCESS         = 1, //memory access requires process context destruction, unrecoverable
809     HSA_EVENTID_MEMORY_FATAL_VM              = 2, //memory access requires all GPU VA context destruction, unrecoverable
810 } HSA_EVENTID_MEMORYFLAGS;
811 
812 typedef struct _HsaAccessAttributeFailure
813 {
814     unsigned int NotPresent  : 1;  // Page not present or supervisor privilege
815     unsigned int ReadOnly    : 1;  // Write access to a read-only page
816     unsigned int NoExecute   : 1;  // Execute access to a page marked NX
817     unsigned int GpuAccess   : 1;  // Host access only
818     unsigned int ECC         : 1;  // ECC failure (if supported by HW)
819     unsigned int Imprecise   : 1;  // Can't determine the exact fault address
820     unsigned int Reserved    : 26; // must be 0
821 } HsaAccessAttributeFailure;
822 
823 // data associated with HSA_EVENTID_MEMORY
824 typedef struct _HsaMemoryAccessFault
825 {
826     HSAuint32                       NodeId;             // H-NUMA node that contains the device where the memory access occurred
827     HSAuint64                       VirtualAddress;     // virtual address this occurred on
828     HsaAccessAttributeFailure       Failure;            // failure attribute
829     HSA_EVENTID_MEMORYFLAGS         Flags;              // event flags
830 } HsaMemoryAccessFault;
831 
832 typedef struct _HsaEventData
833 {
834     HSA_EVENTTYPE   EventType;      //event type
835 
836     union
837     {
838         // return data associated with HSA_EVENTTYPE_SIGNAL and other events
839         HsaSyncVar              SyncVar;
840 
841         // data associated with HSA_EVENTTYPE_NODE_CHANGE
842         HsaNodeChange           NodeChangeState;
843 
844         // data associated with HSA_EVENTTYPE_DEVICE_STATE_CHANGE
845         HsaDeviceStateChange    DeviceState;
846 
847         // data associated with HSA_EVENTTYPE_MEMORY
848         HsaMemoryAccessFault    MemoryAccessFault;
849 
850     } EventData;
851 
852     // the following data entries are internal to the KFD & thunk itself.
853 
854     HSAuint64       HWData1;                    // internal thunk store for Event data  (OsEventHandle)
855     HSAuint64       HWData2;                    // internal thunk store for Event data  (HWAddress)
856     HSAuint32       HWData3;                    // internal thunk store for Event data  (HWData)
857 } HsaEventData;
858 
859 
860 typedef struct _HsaEventDescriptor
861 {
862     HSA_EVENTTYPE   EventType;                  // event type to allocate
863     HSAuint32       NodeId;                     // H-NUMA node containing GPU device that is event source
864     HsaSyncVar      SyncVar;                    // pointer to user mode syncvar data, syncvar->UserDataPtrValue may be NULL
865 } HsaEventDescriptor;
866 
867 
868 typedef struct _HsaEvent
869 {
870     HSA_EVENTID     EventId;
871     HsaEventData    EventData;
872 } HsaEvent;
873 
874 typedef enum _HsaEventTimeout
875 {
876     HSA_EVENTTIMEOUT_IMMEDIATE  = 0,
877     HSA_EVENTTIMEOUT_INFINITE   = 0xFFFFFFFF
878 } HsaEventTimeOut;
879 
880 typedef struct _HsaClockCounters
881 {
882     HSAuint64   GPUClockCounter;
883     HSAuint64   CPUClockCounter;
884     HSAuint64   SystemClockCounter;
885     HSAuint64   SystemClockFrequencyHz;
886 } HsaClockCounters;
887 
888 #ifndef DEFINE_GUID
889 typedef struct _HSA_UUID
890 {
891     HSAuint32   Data1;
892     HSAuint16   Data2;
893     HSAuint16   Data3;
894     HSAuint8    Data4[8];
895 } HSA_UUID;
896 
897 #define HSA_DEFINE_UUID(name, dw, w1, w2, b1, b2, b3, b4, b5, b6, b7, b8) \
898     static const HSA_UUID name = {dw, w1, w2, {b1, b2, b3, b4, b5, b6, b7, b8}}
899 #else
900 #define HSA_UUID GUID
901 #define HSA_DEFINE_UUID DEFINE_GUID
902 #endif
903 
904 // HSA_UUID that identifies the GPU ColorBuffer (CB) block
905 // {9ba429c6-af2d-4b38-b349-157271beac6a}
906 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CB,
907 0x9ba429c6, 0xaf2d, 0x4b38, 0xb3, 0x49, 0x15, 0x72, 0x71, 0xbe, 0xac, 0x6a);
908 
909 // HSA_UUID that identifies the GPU (CPF) block
910 // {2b0ad2b5-1c43-4f46-a7bc-e119411ea6c9}
911 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CPF,
912 0x2b0ad2b5, 0x1c43, 0x4f46, 0xa7, 0xbc, 0xe1, 0x19, 0x41, 0x1e, 0xa6, 0xc9);
913 
914 // HSA_UUID that identifies the GPU (CPG) block
915 // {590ec94d-20f0-448f-8dff-316c679de7ff
916 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_CPG,
917 0x590ec94d, 0x20f0, 0x448f, 0x8d, 0xff, 0x31, 0x6c, 0x67, 0x9d, 0xe7, 0xff);
918 
919 // HSA_UUID that identifies the GPU (DB) block
920 // {3d1a47fc-0013-4ed4-8306-822ca0b7a6c2
921 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_DB,
922 0x3d1a47fc, 0x0013, 0x4ed4, 0x83, 0x06, 0x82, 0x2c, 0xa0, 0xb7, 0xa6, 0xc2);
923 
924 // HSA_UUID that identifies the GPU (GDS) block
925 // {f59276ec-2526-4bf8-8ec0-118f77700dc9
926 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GDS,
927 0xf59276ec, 0x2526, 0x4bf8, 0x8e, 0xc0, 0x11, 0x8f, 0x77, 0x70, 0x0d, 0xc9);
928 
929 // HSA_UUID that identifies the GPU (GRBM) block
930 // {8f00933c-c33d-4801-97b7-7007f78573ad
931 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GRBM,
932 0x8f00933c, 0xc33d, 0x4801, 0x97, 0xb7, 0x70, 0x07, 0xf7, 0x85, 0x73, 0xad);
933 
934 // HSA_UUID that identifies the GPU (GRBMSE) block
935 // {34ebd8d7-7c8b-4d15-88fa-0e4e4af59ac1
936 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_GRBMSE,
937 0x34ebd8d7, 0x7c8b, 0x4d15, 0x88, 0xfa, 0x0e, 0x4e, 0x4a, 0xf5, 0x9a, 0xc1);
938 
939 // HSA_UUID that identifies the GPU (IA) block
940 // {34276944-4264-4fcd-9d6e-ae264582ec51
941 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IA,
942 0x34276944, 0x4264, 0x4fcd, 0x9d, 0x6e, 0xae, 0x26, 0x45, 0x82, 0xec, 0x51);
943 
944 // HSA_UUID that identifies the GPU Memory Controller (MC) block
945 // {13900B57-4956-4D98-81D0-68521937F59C
946 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_MC,
947 0x13900b57, 0x4956, 0x4d98, 0x81, 0xd0, 0x68, 0x52, 0x19, 0x37, 0xf5, 0x9c);
948 
949 // HSA_UUID that identifies the GPU (PASC) block
950 // {b0e7fb5d-0efc-4744-b516-5d23dc1fd56c
951 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_PASC,
952 0xb0e7fb5d, 0x0efc, 0x4744, 0xb5, 0x16, 0x5d, 0x23, 0xdc, 0x1f, 0xd5, 0x6c);
953 
954 // HSA_UUID that identifies the GPU (PASU) block
955 // {9a152b6a-1fad-45f2-a5bf-f163826bd0cd
956 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_PASU,
957 0x9a152b6a, 0x1fad, 0x45f2, 0xa5, 0xbf, 0xf1, 0x63, 0x82, 0x6b, 0xd0, 0xcd);
958 
959 // HSA_UUID that identifies the GPU (SPI) block
960 // {eda81044-d62c-47eb-af89-4f6fbf3b38e0
961 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SPI,
962 0xeda81044, 0xd62c, 0x47eb, 0xaf, 0x89, 0x4f, 0x6f, 0xbf, 0x3b, 0x38, 0xe0);
963 
964 // HSA_UUID that identifies the GPU (SRBM) block
965 // {9f8040e0-6830-4019-acc8-463c9e445b89
966 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SRBM,
967 0x9f8040e0, 0x6830, 0x4019, 0xac, 0xc8, 0x46, 0x3c, 0x9e, 0x44, 0x5b, 0x89);
968 
969 // GUID that identifies the GPU Shader Sequencer (SQ) block
970 // {B5C396B6-D310-47E4-86FC-5CC3043AF508}
971 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SQ,
972 0xb5c396b6, 0xd310, 0x47e4, 0x86, 0xfc, 0x5c, 0xc3, 0x4, 0x3a, 0xf5, 0x8);
973 
974 // HSA_UUID that identifies the GPU (SX) block
975 // {bdb8d737-43cc-4162-be52-51cfb847beaf}
976 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_SX,
977 0xbdb8d737, 0x43cc, 0x4162, 0xbe, 0x52, 0x51, 0xcf, 0xb8, 0x47, 0xbe, 0xaf);
978 
979 // HSA_UUID that identifies the GPU (TA) block
980 // {c01ee43d-ad92-44b1-8ab9-be5e696ceea7}
981 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TA,
982 0xc01ee43d, 0xad92, 0x44b1, 0x8a, 0xb9, 0xbe, 0x5e, 0x69, 0x6c, 0xee, 0xa7);
983 
984 // HSA_UUID that identifies the GPU TextureCache (TCA) block
985 // {333e393f-e147-4f49-a6d1-60914c7086b0}
986 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCA,
987 0x333e393f, 0xe147, 0x4f49, 0xa6, 0xd1,0x60, 0x91, 0x4c, 0x70, 0x86, 0xb0);
988 
989 // HSA_UUID that identifies the GPU TextureCache (TCC) block
990 // {848ce855-d805-4566-a8ab-73e884cc6bff}
991 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCC,
992 0x848ce855, 0xd805, 0x4566, 0xa8, 0xab, 0x73, 0xe8, 0x84, 0xcc, 0x6b, 0xff);
993 
994 // HSA_UUID that identifies the GPU (TCP) block
995 // {e10a013b-17d4-4bf5-b089-429591059b60}
996 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCP,
997 0xe10a013b, 0x17d4, 0x4bf5, 0xb0, 0x89, 0x42, 0x95, 0x91, 0x05, 0x9b, 0x60);
998 
999 // HSA_UUID that identifies the GPU (TCS) block
1000 // {4126245c-4d96-4d1a-8aed-a939d4cc8ec9}
1001 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TCS,
1002 0x4126245c, 0x4d96, 0x4d1a, 0x8a, 0xed, 0xa9, 0x39, 0xd4, 0xcc, 0x8e, 0xc9);
1003 
1004 // HSA_UUID that identifies the GPU (TD) block
1005 // {7d7c0fe4-fe41-4fea-92c9-4544d7706dc6}
1006 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_TD,
1007 0x7d7c0fe4, 0xfe41, 0x4fea, 0x92, 0xc9, 0x45, 0x44, 0xd7, 0x70, 0x6d, 0xc6);
1008 
1009 // HSA_UUID that identifies the GPU (VGT) block
1010 // {0b6a8cb7-7a01-409f-a22c-3014854f1359}
1011 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_VGT,
1012 0x0b6a8cb7, 0x7a01, 0x409f, 0xa2, 0x2c, 0x30, 0x14, 0x85, 0x4f, 0x13, 0x59);
1013 
1014 // HSA_UUID that identifies the GPU (WD) block
1015 // {0e176789-46ed-4b02-972a-916d2fac244a}
1016 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_WD,
1017 0x0e176789, 0x46ed, 0x4b02, 0x97, 0x2a, 0x91, 0x6d, 0x2f, 0xac, 0x24, 0x4a);
1018 
1019 // GUID that identifies the IMOMMUv2 HW device
1020 // {80969879-B0F6-4BE6-97F6-6A6300F5101D}
1021 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_IOMMUV2,
1022 0x80969879, 0xb0f6, 0x4be6, 0x97, 0xf6, 0x6a, 0x63, 0x0, 0xf5, 0x10, 0x1d);
1023 
1024 // GUID that identifies the KFD
1025 // {EA9B5AE1-6C3F-44B3-8954-DAF07565A90A}
1026 HSA_DEFINE_UUID(HSA_PROFILEBLOCK_AMD_KERNEL_DRIVER,
1027 0xea9b5ae1, 0x6c3f, 0x44b3, 0x89, 0x54, 0xda, 0xf0, 0x75, 0x65, 0xa9, 0xa);
1028 
1029 typedef enum _HSA_PROFILE_TYPE
1030 {
1031     HSA_PROFILE_TYPE_PRIVILEGED_IMMEDIATE = 0, //immediate access counter (KFD access only)
1032     HSA_PROFILE_TYPE_PRIVILEGED_STREAMING = 1, //streaming counter, HW continuously
1033                                                //writes to memory on updates (KFD access only)
1034     HSA_PROFILE_TYPE_NONPRIV_IMMEDIATE    = 2, //user-queue accessible counter
1035     HSA_PROFILE_TYPE_NONPRIV_STREAMING    = 3, //user-queue accessible counter
1036     //...
1037     HSA_PROFILE_TYPE_NUM,
1038 
1039     HSA_PROFILE_TYPE_SIZE                 = 0xFFFFFFFF      // In order to align to 32-bit value
1040 } HSA_PROFILE_TYPE;
1041 
1042 
1043 typedef struct _HsaCounterFlags
1044 {
1045     union
1046     {
1047         struct
1048         {
1049             unsigned int  Global       : 1;  // counter is global
1050                                              // (not tied to VMID/WAVE/CU, ...)
1051             unsigned int  Resettable   : 1;  // counter can be reset by SW
1052                                              // (always to 0?)
1053             unsigned int  ReadOnly     : 1;  // counter is read-only
1054                                              // (but may be reset, if indicated)
1055             unsigned int  Stream       : 1;  // counter has streaming capability
1056                                              // (after trigger, updates buffer)
1057             unsigned int  Reserved     : 28;
1058         } ui32;
1059         HSAuint32      Value;
1060     };
1061 } HsaCounterFlags;
1062 
1063 
1064 typedef struct _HsaCounter
1065 {
1066     HSA_PROFILE_TYPE Type;              // specifies the counter type
1067     HSAuint64        CounterId;         // indicates counter register offset
1068     HSAuint32        CounterSizeInBits; // indicates relevant counter bits
1069     HSAuint64        CounterMask;       // bitmask for counter value (if applicable)
1070     HsaCounterFlags  Flags;             // Property flags (see above)
1071     HSAuint32        BlockIndex;        // identifies block the counter belongs to,
1072                                         // value may be 0 to NumBlocks
1073 } HsaCounter;
1074 
1075 
1076 typedef struct _HsaCounterBlockProperties
1077 {
1078     HSA_UUID                    BlockId;        // specifies the block location
1079     HSAuint32                   NumCounters;    // How many counters are available?
1080                                                 // (sizes Counters[] array below)
1081     HSAuint32                   NumConcurrent;  // How many counter slots are available
1082                                                 // in block?
1083     HsaCounter                  Counters[1];    // Start of counter array
1084                                                 // (NumCounters elements total)
1085 } HsaCounterBlockProperties;
1086 
1087 
1088 typedef struct _HsaCounterProperties
1089 {
1090     HSAuint32                   NumBlocks;      // How many profilable block are available?
1091                                                 // (sizes Blocks[] array below)
1092     HSAuint32                   NumConcurrent;  // How many blocks slots can be queried
1093                                                 // concurrently by HW?
1094     HsaCounterBlockProperties   Blocks[1];      // Start of block array
1095                                                 // (NumBlocks elements total)
1096 } HsaCounterProperties;
1097 
1098 typedef HSAuint64   HSATraceId;
1099 
1100 typedef struct _HsaPmcTraceRoot
1101 {
1102     HSAuint64                   TraceBufferMinSizeBytes;// (page aligned)
1103     HSAuint32                   NumberOfPasses;
1104     HSATraceId                  TraceId;
1105 } HsaPmcTraceRoot;
1106 
1107 typedef struct _HsaGpuTileConfig
1108 {
1109     HSAuint32 *TileConfig;
1110     HSAuint32 *MacroTileConfig;
1111     HSAuint32 NumTileConfigs;
1112     HSAuint32 NumMacroTileConfigs;
1113 
1114     HSAuint32 GbAddrConfig;
1115 
1116     HSAuint32 NumBanks;
1117     HSAuint32 NumRanks;
1118     /* 9 dwords on 64-bit system */
1119     HSAuint32 Reserved[7]; /* Round up to 16 dwords for future extension */
1120 } HsaGpuTileConfig;
1121 
1122 typedef enum _HSA_POINTER_TYPE {
1123     HSA_POINTER_UNKNOWN = 0,
1124     HSA_POINTER_ALLOCATED = 1,           // Allocated with hsaKmtAllocMemory (except scratch)
1125     HSA_POINTER_REGISTERED_USER = 2,     // Registered user pointer
1126     HSA_POINTER_REGISTERED_GRAPHICS = 3  // Registered graphics buffer
1127                                          // (hsaKmtRegisterGraphicsToNodes)
1128 } HSA_POINTER_TYPE;
1129 
1130 typedef struct _HsaPointerInfo {
1131     HSA_POINTER_TYPE   Type;             // Pointer type
1132     HSAuint32          Node;             // Node where the memory is located
1133     HsaMemFlags        MemFlags;         // Only valid for HSA_POINTER_ALLOCATED
1134     void               *CPUAddress;      // Start address for CPU access
1135     HSAuint64          GPUAddress;       // Start address for GPU access
1136     HSAuint64          SizeInBytes;      // Size in bytes
1137     HSAuint32          NRegisteredNodes; // Number of nodes the memory is registered to
1138     HSAuint32          NMappedNodes;     // Number of nodes the memory is mapped to
1139     const HSAuint32    *RegisteredNodes; // Array of registered nodes
1140     const HSAuint32    *MappedNodes;     // Array of mapped nodes
1141     void               *UserData;        // User data associated with the memory
1142 } HsaPointerInfo;
1143 
1144 typedef HSAuint32 HsaSharedMemoryHandle[8];
1145 
1146 typedef struct _HsaMemoryRange {
1147 	void               *MemoryAddress;   // Pointer to GPU memory
1148 	HSAuint64          SizeInBytes;      // Size of above memory
1149 } HsaMemoryRange;
1150 
1151 #pragma pack(pop, hsakmttypes_h)
1152 
1153 
1154 #ifdef __cplusplus
1155 }   //extern "C"
1156 #endif
1157 
1158 #endif //_HSAKMTTYPES_H_
1159