1 /*========================== begin_copyright_notice ============================ 2 3 Copyright (C) 2017-2021 Intel Corporation 4 5 SPDX-License-Identifier: MIT 6 7 ============================= end_copyright_notice ===========================*/ 8 9 /*****************************************************************************\ 10 Notes: Common file that will be used for C99 device enqueue kernels and Runtime CLT's 11 \*****************************************************************************/ 12 13 #ifndef DEVICE_ENQUEUE_INTERNAL_TYPES_H 14 #define DEVICE_ENQUEUE_INTERNAL_TYPES_H 15 16 #define IGIL_KERNEL_ID_ENQUEUE_MARKER -1 17 18 // IGIL Event Flags 19 #define IGIL_EVENT_UNUSED -501 20 #define IGIL_EVENT_QUEUED -502 21 #define IGIL_EVENT_INVALID_HANDLE 0xffffffff 22 23 // IGIL Event Types 24 #define IGIL_EVENT_TYPE_NORMAL 0x0 25 #define IGIL_EVENT_TYPE_USER 0x1 26 #define IGIL_EVENT_TYPE_PROFILING 0x2 27 28 // Canary values 29 #define IGIL_MAGIC_NUMBER 0x494E5443 30 #define IGIL_COMMAND_MAGIC_NUMBER 0x494E544347505500 31 32 //!!! Make sure value of this define equals PARALLEL_SCHEDULER_HW_GROUPS in DeviceEnqueue.h 33 #define MAX_NUMBER_OF_PARALLEL_GPGPU_WALKERS ( 64 ) 34 #define MAX_NUMBER_OF_ENQUEUE_MARKER ( 128 ) 35 #define MAX_NUMBER_OF_EVENTS_TO_UPDATE ( MAX_NUMBER_OF_PARALLEL_GPGPU_WALKERS + MAX_NUMBER_OF_ENQUEUE_MARKER ) 36 37 38 //timestamp written by pipe control needs to be multipled by 80 ns, TODO : this is different on SKL and BXT. code needs to be added to handle them correctly. 39 #define PROFILING_TIMER_RESOLUTION 80 40 //timestamp value is written on 36 bits 41 #define PROFILING_MAX_TIMER_VALUE 0xFFFFFFFFF 42 43 #define exec_offsetof( x, y ) (int)(&((x*)(0))->y) 44 45 typedef union ptr64_t 46 { 47 long* m_ptr; 48 ulong m_value; 49 } IGIL_ptr64_t; 50 51 typedef struct 52 { 53 uint m_dispatchDimensions; 54 ulong m_globalWorkOffset[3]; 55 ulong m_globalWorkSize[3]; 56 ulong m_localWorkSize[3]; 57 } IGIL_ndrange_t; 58 59 typedef int IGIL_clk_event_t; 60 61 typedef int IGIL_kernel_enqueue_flags_t; 62 63 // internal device representation of an event 64 typedef struct 65 { 66 uint m_state; // unused, queued, submitted, running, complete. 67 uint m_eventType; // user event, profiling enabled... 68 int m_refCount; // enqueues that depend on this event. free event when all 0: {refCount, numChildren, numDependents} 69 int m_numChildren; // this event triggers success when all children complete 70 int m_numDependents; // number of events waiting for this event to reach CL_COMPLETE 71 uint m_parentEvent; // when this child completes (m_numChildren=0): 72 // 1. set state = CL_COMPLETE 73 // 2. decrement the parent's m_numChildren (if parent valid) 74 // 3. if parent's numChildren == 0, goto #1 75 //!!!!! make sure that profiling variables are aligned to 64 bits, be extremly precaucious when modifiying this structure, in case of broken alignement PIPE CONTROL will write to wrong offset!!!!!! 76 ulong m_profilingCmdStart; // timestamp when this command starts -> it is event returned by some enqueue and timestamp start for this is after scheduler which enqueued this cmd 77 ulong m_profilingCmdEnd; // timestamp when this command ends -> timestamp write after kernel directly associated with this event. 78 ulong m_profilingCmdComplete; // timestamp when this event is complete, all childs are done, so when this event transitions to CL_COMPLETE. 79 ulong m_pProfiling; // address to write profiling info to (if enabled) //turned off becasue of pointer size problems todo:resolve 80 } IGIL_DeviceEvent; 81 82 // internal device event pool representation 83 typedef struct 84 { 85 ulong m_CLcompleteTimestamp; // only scheduler updates state of events, here is timestamp used for profiling to indicate when this transition happened. 86 float m_TimestampResolution; // resolution of the timestamp counter 87 uint m_padding; // padding is needed because of alignment requirements for events 88 uint m_head; // pool head point in IGIL_DeviceEvent units (0 means first event) 89 uint m_size; // number of events there is space for after m_size 90 // variable legnth part starts here 91 // m_size * sizeof(IGIL_DeviceEvent) bytes long 92 } IGIL_EventPool; 93 94 // internal device enqueue command representation 95 typedef struct 96 { 97 uint m_commandSize; // size in bytes, including variable part and padding to 64bytes and sizeof(IGIL_CommandHeader) 98 ulong m_magic; // 'I' 'N' 'T' 'C' 'G' 'P' 'U' canary 99 int m_kernelId; // this value will be used to choose kernel for GPGPU walker. 100 IGIL_ndrange_t m_range; // real version would have dimensions, offsets, multiple ranges 101 IGIL_clk_event_t m_event; // handle to event associated with this command, if any 102 uint m_numScalarArguments; // number of scalars to patch in curbe including values 103 uint m_sizeOfScalarArguments; // size of scalars. TODO : needed? 104 uint m_numOfLocalPtrSizes; // number of local sizes passed into m_data 105 uint m_totalLocalSize; // total amount of SLM used within kernel. 106 uint m_numGlobalCapturedBuffer; // total number of global buffer passes from parent to child 107 uint m_numDependencies; // events this command depends on. handles to them will be the first members of m_args 108 uint m_commandState; // command state , may not be needed. 109 IGIL_kernel_enqueue_flags_t m_enqueueFlags; // flags that were used during enqueue 110 uint m_numGlobalArguments; //total number of global arguments passed as kernel arguments, excluding global pointers. 111 uint m_data[1]; 112 // variable length part starts here 113 // Event Data: # number of events of size sizeof(clk_event_t) store events IDS ( m_numDependencies ) 114 // Scalar Captured Variable Data: # number of scalar kernel arguments with values ( m_numScalarArguments ) 115 // Global UAV Argument Data: arg number associated with each of the global memory pointer 116 // size of each argument is 2B. size: m_numGlobalCapturedBuffer * 2 117 // Global UAV Address Data: address of global mem surfaces: uav address(64 bit). 118 // size: m_numGlobalCapturedBuffer * 8 119 // Local: # number of local surfaces sizes ( DWORD each ) 120 // Global arguments data : arn number associated with each of the global memory argument 121 // Global argument unique id : argument unique ID that can identify this resource. 122 } IGIL_CommandHeader; 123 124 // intneral device controls/flags 125 typedef struct 126 { 127 uint m_StackSize; 128 uint m_StackTop; 129 uint m_PreviousHead; 130 uint m_TotalNumberOfQueues; 131 uint m_SecondLevelBatchOffset; 132 uint m_PreviousNumberOfQueues; 133 uint m_LastScheduleEventNumber; 134 uint m_IsProfilingEnabled; 135 uint m_DebugNextBlockID; 136 uint m_DebugNextBlockGWS; 137 uint m_DebugParentEvent; 138 uint m_SchedulerConstantBufferSize; 139 uint m_SchedulerDSHOffset; 140 uint m_DynamicHeapSizeInBytes; 141 uint m_DynamicHeapStart; 142 uint m_IDTstart; 143 uint m_QstorageSize; 144 uint m_QstorageTop; 145 ulong m_EventTimestampAddress; 146 uint m_CurrentIDToffset; 147 uint m_CurrentDSHoffset; 148 uint m_PreviousStorageTop; 149 uint m_PreviousStackTop; 150 uint m_IDTAfterFirstPhase; 151 uint m_CurrentScheduleEventNumber; 152 uint m_EnqueueMarkerScheduled; 153 ulong m_DummyAtomicOperationPlaceholder; 154 uint m_StartBlockID; 155 int m_SLBENDoffsetInBytes; 156 uint m_BTbaseOffset; 157 uint m_BTmaxSize; 158 uint m_CurrentSSHoffset; 159 uint m_ErrorCode; 160 uint m_CriticalSection; 161 uint m_ParentDSHOffset; // Offset to DSH in DSHMemInfo.pBuffer 162 IGIL_clk_event_t m_EventDependencies[ MAX_NUMBER_OF_EVENTS_TO_UPDATE ]; 163 ulong m_CleanupSectionAddress; 164 uint m_CleanupSectionSize; 165 uint m_IsSimulation; 166 //temporary place for experiments. 167 uint m_SchedulerEarlyReturnCounter; 168 uint m_SchedulerEarlyReturn; 169 uint Temporary[10];//for debug 170 } IGIL_ExecutionControls; 171 172 // internal device command queue representation 173 typedef struct 174 { 175 uint m_magic; // 'I' 'N' 'T' C' 176 uint m_head; // next free location in the queue 177 uint m_size; // size of the queue in bytes 178 IGIL_ExecutionControls m_controls; 179 180 // The header must be aligned to sizeof(IGIL_CommandHeader) 181 182 // variable length part starts here 183 // m_size bytes used to fill 184 } IGIL_CommandQueue; 185 186 typedef struct 187 { 188 uint m_parameterType; 189 uint m_parameterSize; 190 uint m_patchOffset; 191 uint m_sourceOffset; // for tokens that use 3 dimensions, 0 , 4, 8 indicates dimension 192 } IGIL_KernelCurbeParams; 193 typedef struct 194 { 195 uint m_KernelDataOffset; 196 uint m_SamplerHeapOffset; // Offset to SamplerHeap ( BorderColorState and SamplerStateArray ) on KRS 197 uint m_SamplerParamsOffset; 198 uint m_ConstantBufferOffset; 199 uint m_SSHTokensOffset; 200 uint m_BTSoffset; 201 uint m_BTSize; 202 }IGIL_KernelAddressData; 203 typedef struct 204 { 205 uint m_numberOfCurbeParams; // number of paramters to patch 206 uint m_numberOfCurbeTokens; 207 uint m_numberOfSamplerStates; 208 uint m_SizeOfSamplerHeap; // BorderColorState with SamplerStateArray 209 uint m_SamplerBorderColorStateOffsetOnDSH; // Offset to SamplerStateArray on block's DSH 210 uint m_SamplerStateArrayOffsetOnDSH; // Offset to SamplerStateArray on block's DSH 211 uint m_sizeOfConstantBuffer; 212 ulong m_PatchTokensMask; 213 ulong m_ScratchSpacePatchValue; 214 uint m_SIMDSize; 215 uint m_HasBarriers; 216 uint m_RequiredWkgSizes[3]; 217 uint m_InilineSLMSize; 218 uint m_NeedLocalIDS; 219 uint m_PayloadSize; 220 uint m_DisablePreemption; 221 uint m_CanRunConcurently; 222 IGIL_KernelCurbeParams m_data[1]; //IGIL_KernelCurbeParams 223 } IGIL_KernelData; 224 225 typedef struct 226 { 227 ulong m_numberOfKernels; //number of kernels. 228 uint m_ParentImageDataOffset; 229 uint m_ParentKernelImageCount; 230 uint m_ParentSamplerParamsOffset; 231 uint m_ParentSamplerCount; 232 IGIL_KernelAddressData m_data[1]; //offsets for n x kernel data. 233 } IGIL_KernelDataHeader; 234 235 typedef struct 236 { 237 uint m_Width; 238 uint m_Height; 239 uint m_Depth; 240 uint m_ArraySize; 241 uint m_NumMipLevels; 242 uint m_NumSamples; 243 uint m_ChannelOrder; 244 uint m_ChannelDataType; 245 uint m_ObjectID; 246 } IGIL_ImageParamters; 247 248 249 typedef struct 250 { 251 uint m_ArgID; // Block's argument id 252 uint m_SamplerStateOffset; // Offset of specific ( with m_ArgID ) Sampler state on per-block DSH 253 } IGIL_SamplerParams; 254 255 typedef struct 256 { 257 uint m_ObjectID; // Sampler Object id 258 259 uint m_AddressingMode; 260 uint NormalizedCoords; 261 uint CoordinateSnapRequired; 262 } IGIL_ParentSamplerParams; // Parent's Sampler Curbe data 263 264 265 266 #define IGIL_QUEUE_PROLOG_SIZE ( sizeof(IGIL_CommandQueue) ) 267 #define IGIL_QUEUE_COMMAND_SIZE ( sizeof(IGIL_CommandHeader) ) 268 #define IGIL_QUEUE_PROLOG_COMMAND_SIZE ( IGIL_QUEUE_PROLOG_SIZE + IGIL_QUEUE_COMMAND_SIZE ) 269 // IGIL_CommandQueue.m_head must be aligned to sizeof(IGIL_CommandHeader). 270 // This macro sets m_head to the correct initial value 271 #define IGIL_DEVICE_QUEUE_HEAD_INIT ( IGIL_QUEUE_COMMAND_SIZE > IGIL_QUEUE_PROLOG_SIZE ? sizeof(IGIL_CommandHeader) : ( IGIL_QUEUE_PROLOG_SIZE + ( IGIL_QUEUE_COMMAND_SIZE - IGIL_QUEUE_PROLOG_COMMAND_SIZE % IGIL_QUEUE_COMMAND_SIZE ) ) ) 272 273 #endif 274