1 /*========================== begin_copyright_notice ============================
2 
3 Copyright (C) 2017-2021 Intel Corporation
4 
5 SPDX-License-Identifier: MIT
6 
7 ============================= end_copyright_notice ===========================*/
8 
9 /*****************************************************************************\
10 Notes: Common file that will be used for C99 device enqueue kernels and Runtime CLT's
11 \*****************************************************************************/
12 
13 #ifndef DEVICE_ENQUEUE_INTERNAL_TYPES_H
14 #define DEVICE_ENQUEUE_INTERNAL_TYPES_H
15 
16 #define IGIL_KERNEL_ID_ENQUEUE_MARKER               -1
17 
18 // IGIL Event Flags
19 #define IGIL_EVENT_UNUSED                           -501
20 #define IGIL_EVENT_QUEUED                           -502
21 #define IGIL_EVENT_INVALID_HANDLE                   0xffffffff
22 
23 // IGIL Event Types
24 #define IGIL_EVENT_TYPE_NORMAL                      0x0
25 #define IGIL_EVENT_TYPE_USER                        0x1
26 #define IGIL_EVENT_TYPE_PROFILING                   0x2
27 
28 // Canary values
29 #define IGIL_MAGIC_NUMBER                           0x494E5443
30 #define IGIL_COMMAND_MAGIC_NUMBER                   0x494E544347505500
31 
32 //!!! Make sure value of this define equals PARALLEL_SCHEDULER_HW_GROUPS in DeviceEnqueue.h
33 #define MAX_NUMBER_OF_PARALLEL_GPGPU_WALKERS        ( 64 )
34 #define MAX_NUMBER_OF_ENQUEUE_MARKER                ( 128 )
35 #define MAX_NUMBER_OF_EVENTS_TO_UPDATE              ( MAX_NUMBER_OF_PARALLEL_GPGPU_WALKERS + MAX_NUMBER_OF_ENQUEUE_MARKER )
36 
37 
38 //timestamp written by pipe control needs to be multipled by 80 ns, TODO : this is different on SKL and BXT. code needs to be added to handle them correctly.
39 #define    PROFILING_TIMER_RESOLUTION                                                   80
40 //timestamp value is written on 36 bits
41 #define    PROFILING_MAX_TIMER_VALUE                                                    0xFFFFFFFFF
42 
43 #define exec_offsetof( x, y ) (int)(&((x*)(0))->y)
44 
45 typedef union ptr64_t
46 {
47     long*  m_ptr;
48     ulong  m_value;
49 } IGIL_ptr64_t;
50 
51 typedef struct
52 {
53     uint  m_dispatchDimensions;
54     ulong m_globalWorkOffset[3];
55     ulong m_globalWorkSize[3];
56     ulong m_localWorkSize[3];
57 } IGIL_ndrange_t;
58 
59 typedef int IGIL_clk_event_t;
60 
61 typedef int IGIL_kernel_enqueue_flags_t;
62 
63 // internal device representation of an event
64 typedef struct
65 {
66     uint m_state;                   // unused, queued, submitted, running, complete.
67     uint m_eventType;               // user event, profiling enabled...
68     int m_refCount;                 // enqueues that depend on this event. free event when all 0: {refCount, numChildren, numDependents}
69     int m_numChildren;              // this event triggers success when all children complete
70     int m_numDependents;            // number of events waiting for this event to reach CL_COMPLETE
71     uint m_parentEvent;             // when this child completes (m_numChildren=0):
72                                     //   1. set state = CL_COMPLETE
73                                     //   2. decrement the parent's m_numChildren (if parent valid)
74                                     //   3. if parent's numChildren == 0, goto #1
75     //!!!!! make sure that profiling variables are aligned to 64 bits, be extremly precaucious when modifiying this structure, in case of broken alignement PIPE CONTROL will write to wrong offset!!!!!!
76     ulong m_profilingCmdStart;      // timestamp when this command starts -> it is event returned by some enqueue and timestamp start for this is after scheduler which enqueued this cmd
77     ulong m_profilingCmdEnd;        // timestamp when this command ends -> timestamp write after kernel directly associated with this event.
78     ulong m_profilingCmdComplete;   // timestamp when this event is complete, all childs are done, so when this event transitions to CL_COMPLETE.
79     ulong m_pProfiling;      // address to write profiling info to (if enabled) //turned off becasue of pointer size problems todo:resolve
80 } IGIL_DeviceEvent;
81 
82 // internal device event pool representation
83 typedef struct
84 {
85     ulong m_CLcompleteTimestamp;    // only scheduler updates state of events, here is timestamp used for profiling to indicate when this transition happened.
86     float m_TimestampResolution;    // resolution of the timestamp counter
87     uint m_padding;                 // padding is needed because of alignment requirements for events
88     uint m_head;                    // pool head point in IGIL_DeviceEvent units (0 means first event)
89     uint m_size;                    // number of events there is space for after m_size
90     // variable legnth part starts here
91     //  m_size * sizeof(IGIL_DeviceEvent) bytes long
92 } IGIL_EventPool;
93 
94 // internal device enqueue command representation
95 typedef struct
96 {
97     uint   m_commandSize;           // size in bytes, including variable part and padding to 64bytes and sizeof(IGIL_CommandHeader)
98     ulong  m_magic;                 // 'I' 'N' 'T' 'C' 'G' 'P' 'U' canary
99     int    m_kernelId;              // this value will be used to choose kernel for GPGPU walker.
100     IGIL_ndrange_t    m_range;      // real version would have dimensions, offsets, multiple ranges
101     IGIL_clk_event_t  m_event;      // handle to event associated with this command, if any
102     uint   m_numScalarArguments;    // number of scalars to patch in curbe including values
103     uint   m_sizeOfScalarArguments; // size of scalars. TODO : needed?
104     uint   m_numOfLocalPtrSizes;    // number of local sizes passed into m_data
105     uint   m_totalLocalSize;        // total amount of SLM used within kernel.
106     uint   m_numGlobalCapturedBuffer; // total number of global buffer passes from parent to child
107     uint   m_numDependencies;       // events this command depends on. handles to them will be the first members of m_args
108     uint   m_commandState;          // command state , may not be needed.
109     IGIL_kernel_enqueue_flags_t m_enqueueFlags;    // flags that were used during enqueue
110     uint   m_numGlobalArguments;    //total number of global arguments passed as kernel arguments, excluding global pointers.
111     uint   m_data[1];
112     // variable length part starts here
113     //   Event Data: # number of events of size sizeof(clk_event_t) store events IDS ( m_numDependencies )
114     //   Scalar Captured Variable Data: # number of scalar kernel arguments with values ( m_numScalarArguments )
115     //   Global UAV Argument Data: arg number associated with each of the global memory pointer
116     //                             size of each argument is 2B. size: m_numGlobalCapturedBuffer * 2
117     //   Global UAV Address Data: address of global mem surfaces: uav address(64 bit).
118     //                        size: m_numGlobalCapturedBuffer * 8
119     //   Local: # number of local surfaces sizes ( DWORD each )
120     //   Global arguments data : arn number associated with each of the global memory argument
121     //   Global argument unique id : argument unique ID that can identify this resource.
122 } IGIL_CommandHeader;
123 
124 // intneral device controls/flags
125 typedef struct
126 {
127     uint m_StackSize;
128     uint m_StackTop;
129     uint m_PreviousHead;
130     uint m_TotalNumberOfQueues;
131     uint m_SecondLevelBatchOffset;
132     uint m_PreviousNumberOfQueues;
133     uint m_LastScheduleEventNumber;
134     uint m_IsProfilingEnabled;
135     uint m_DebugNextBlockID;
136     uint m_DebugNextBlockGWS;
137     uint m_DebugParentEvent;
138     uint m_SchedulerConstantBufferSize;
139     uint m_SchedulerDSHOffset;
140     uint m_DynamicHeapSizeInBytes;
141     uint m_DynamicHeapStart;
142     uint m_IDTstart;
143     uint m_QstorageSize;
144     uint m_QstorageTop;
145     ulong m_EventTimestampAddress;
146     uint m_CurrentIDToffset;
147     uint m_CurrentDSHoffset;
148     uint m_PreviousStorageTop;
149     uint m_PreviousStackTop;
150     uint m_IDTAfterFirstPhase;
151     uint m_CurrentScheduleEventNumber;
152     uint m_EnqueueMarkerScheduled;
153     ulong m_DummyAtomicOperationPlaceholder;
154     uint m_StartBlockID;
155     int m_SLBENDoffsetInBytes;
156     uint m_BTbaseOffset;
157     uint m_BTmaxSize;
158     uint m_CurrentSSHoffset;
159     uint m_ErrorCode;
160     uint m_CriticalSection;
161     uint m_ParentDSHOffset;         // Offset to DSH in DSHMemInfo.pBuffer
162     IGIL_clk_event_t m_EventDependencies[ MAX_NUMBER_OF_EVENTS_TO_UPDATE ];
163     ulong m_CleanupSectionAddress;
164     uint m_CleanupSectionSize;
165     uint m_IsSimulation;
166     //temporary place for experiments.
167     uint m_SchedulerEarlyReturnCounter;
168     uint m_SchedulerEarlyReturn;
169     uint Temporary[10];//for debug
170 } IGIL_ExecutionControls;
171 
172 // internal device command queue representation
173 typedef struct
174 {
175     uint m_magic;            // 'I' 'N' 'T' C'
176     uint m_head;             // next free location in the queue
177     uint m_size;             // size of the queue in bytes
178     IGIL_ExecutionControls m_controls;
179 
180     // The header must be aligned to sizeof(IGIL_CommandHeader)
181 
182     // variable length part starts here
183     // m_size bytes used to fill
184 } IGIL_CommandQueue;
185 
186 typedef struct
187 {
188     uint m_parameterType;
189     uint m_parameterSize;
190     uint m_patchOffset;
191     uint m_sourceOffset; // for tokens that use 3 dimensions, 0 , 4, 8 indicates dimension
192 } IGIL_KernelCurbeParams;
193 typedef struct
194 {
195     uint            m_KernelDataOffset;
196     uint            m_SamplerHeapOffset;       // Offset to SamplerHeap ( BorderColorState and SamplerStateArray ) on KRS
197     uint            m_SamplerParamsOffset;
198     uint            m_ConstantBufferOffset;
199     uint            m_SSHTokensOffset;
200     uint            m_BTSoffset;
201     uint            m_BTSize;
202 }IGIL_KernelAddressData;
203 typedef struct
204 {
205     uint                      m_numberOfCurbeParams; // number of paramters to patch
206     uint                      m_numberOfCurbeTokens;
207     uint                      m_numberOfSamplerStates;
208     uint                      m_SizeOfSamplerHeap;              // BorderColorState with SamplerStateArray
209     uint                      m_SamplerBorderColorStateOffsetOnDSH;    // Offset to SamplerStateArray on block's DSH
210     uint                      m_SamplerStateArrayOffsetOnDSH;    // Offset to SamplerStateArray on block's DSH
211     uint                      m_sizeOfConstantBuffer;
212     ulong                     m_PatchTokensMask;
213     ulong                     m_ScratchSpacePatchValue;
214     uint                      m_SIMDSize;
215     uint                      m_HasBarriers;
216     uint                      m_RequiredWkgSizes[3];
217     uint                      m_InilineSLMSize;
218     uint                      m_NeedLocalIDS;
219     uint                      m_PayloadSize;
220     uint                      m_DisablePreemption;
221     uint                      m_CanRunConcurently;
222     IGIL_KernelCurbeParams    m_data[1]; //IGIL_KernelCurbeParams
223 } IGIL_KernelData;
224 
225 typedef struct
226 {
227     ulong                  m_numberOfKernels; //number of kernels.
228     uint                   m_ParentImageDataOffset;
229     uint                   m_ParentKernelImageCount;
230     uint                   m_ParentSamplerParamsOffset;
231     uint                   m_ParentSamplerCount;
232     IGIL_KernelAddressData m_data[1]; //offsets for n x kernel data.
233 } IGIL_KernelDataHeader;
234 
235 typedef struct
236 {
237     uint                   m_Width;
238     uint                   m_Height;
239     uint                   m_Depth;
240     uint                   m_ArraySize;
241     uint                   m_NumMipLevels;
242     uint                   m_NumSamples;
243     uint                   m_ChannelOrder;
244     uint                   m_ChannelDataType;
245     uint                   m_ObjectID;
246 } IGIL_ImageParamters;
247 
248 
249 typedef struct
250 {
251     uint                   m_ArgID;                     // Block's argument id
252     uint                   m_SamplerStateOffset;        // Offset of specific ( with m_ArgID ) Sampler state on per-block DSH
253 } IGIL_SamplerParams;
254 
255 typedef struct
256 {
257     uint                   m_ObjectID;                  // Sampler Object id
258 
259     uint                   m_AddressingMode;
260     uint                   NormalizedCoords;
261     uint                   CoordinateSnapRequired;
262 } IGIL_ParentSamplerParams;                             // Parent's Sampler Curbe data
263 
264 
265 
266 #define IGIL_QUEUE_PROLOG_SIZE ( sizeof(IGIL_CommandQueue) )
267 #define IGIL_QUEUE_COMMAND_SIZE ( sizeof(IGIL_CommandHeader) )
268 #define IGIL_QUEUE_PROLOG_COMMAND_SIZE ( IGIL_QUEUE_PROLOG_SIZE + IGIL_QUEUE_COMMAND_SIZE )
269 // IGIL_CommandQueue.m_head must be aligned to sizeof(IGIL_CommandHeader).
270 // This macro sets m_head to the correct initial value
271 #define IGIL_DEVICE_QUEUE_HEAD_INIT ( IGIL_QUEUE_COMMAND_SIZE > IGIL_QUEUE_PROLOG_SIZE ? sizeof(IGIL_CommandHeader) : ( IGIL_QUEUE_PROLOG_SIZE + ( IGIL_QUEUE_COMMAND_SIZE - IGIL_QUEUE_PROLOG_COMMAND_SIZE % IGIL_QUEUE_COMMAND_SIZE ) ) )
272 
273 #endif
274