1/*========================== begin_copyright_notice ============================ 2 3Copyright (C) 2017-2021 Intel Corporation 4 5SPDX-License-Identifier: MIT 6 7============================= end_copyright_notice ===========================*/ 8 9//===- IGILBiF_Device_Enqueue.cl - IGIL device enqueue functions -===// 10// 11// This file defines IGIL builtin versions of OpenCL device enqueue. 12// 13//===----------------------------------------------------------------===// 14 15#ifndef __BIF_DEVICE_ENQUEUE_CL__ 16#define __BIF_DEVICE_ENQUEUE_CL__ 17 18#define __EXECUTION_MODEL_DEBUG 19#include "DeviceEnqueueHelpers.h" 20 21extern __constant int __DashGSpecified; 22 23#define exec_offsetof( x, y ) (int)(&((x*)(0))->y) 24 25// float passed as int 26extern __constant int __ProfilingTimerResolution; 27INLINE float __intel__getProfilingTimerResolution() 28{ 29 return as_float(__ProfilingTimerResolution); 30} 31 32//===----------------------------------------------------------------------===// 33// Internal Helper Functions for Events 34//===----------------------------------------------------------------------===// 35 36/////////////////////////////////////////////////////////////////////////// 37// 38// If -g is specified, we are allowed to return a more specific error code 39// indicating why enqueue_kernel() failed. 40// 41INLINE int __intel_ErrorCode(int code) 42{ 43 if (__DashGSpecified) 44 { 45 return code; 46 } 47 else 48 { 49 return CLK_ENQUEUE_FAILURE; 50 } 51} 52 53 __global IGIL_EventPool* IGIL_GetEventPool() 54{ 55 return (__global IGIL_EventPool*)__builtin_IB_get_event_pool(); 56} 57 58__global IGIL_DeviceEvent* IGIL_GetDeviceEvents() 59{ 60 __global IGIL_EventPool *pool = IGIL_GetEventPool(); 61 62 return (__global IGIL_DeviceEvent *)(pool + 1); 63} 64 65INLINE bool OVERLOADABLE IGIL_Valid_Event( __spirv_DeviceEvent in_event ) 66{ 67 // Get the event pool 68 __global IGIL_EventPool *pool = IGIL_GetEventPool(); 69 70 bool retValue = true; 71 72 if( ( ( int )(__builtin_astype(in_event, __private void*)) >= pool->m_size ) || 73 ( IGIL_EVENT_INVALID_HANDLE == (size_t)__builtin_astype(in_event, __private void*) ) ) 74 { 75 retValue = false; 76 } 77 78 return retValue; 79} 80 81INLINE int IGIL_AcquireEvent() 82{ 83 // Get the event pool 84 __global IGIL_EventPool *pool = IGIL_GetEventPool(); 85 86 // offset into the event data in the pool 87 __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents(); 88 89 uint poolSize = pool->m_size; 90 uint poolHead = pool->m_head; 91 92 int eventIndex = IGIL_EVENT_INVALID_HANDLE; 93 94 // Get an event index 95 while( poolHead < poolSize ) 96 { 97 int attemptIndex = atomic_cmpxchg( &pool->m_head, poolHead, poolHead + 1 ); 98 99 if( attemptIndex == poolHead ) 100 { 101 eventIndex = attemptIndex; 102 break; 103 } 104 else 105 { 106 poolHead = pool->m_head; 107 } 108 } 109 110 // Event pool has filled up - do a linear search for previously 111 // freed events 112 if( eventIndex == IGIL_EVENT_INVALID_HANDLE ) 113 { 114 for( int i = 0; i < poolSize; i++ ) 115 { 116 int status = atomic_cmpxchg( &events[i].m_state, IGIL_EVENT_UNUSED, IGIL_EVENT_QUEUED ); 117 118 if( IGIL_EVENT_UNUSED == status ) 119 { 120 // found an unused event. return this handle. 121 eventIndex = i; 122 } 123 } 124 } 125 126 if( eventIndex != IGIL_EVENT_INVALID_HANDLE ) 127 { 128 // creation of event sets reference count to 1 129 events[eventIndex].m_refCount = 1; 130 131 // create with no outstanding child 132 // act of enqueue using this event will increment num children 133 // hence, a kernel with an m_event is its own child; this count is decremented in UpdateEventStatus 134 events[eventIndex].m_numChildren = 0; 135 136 // no commands have been made dependent on this event yet, refcount = 0 137 events[eventIndex].m_numDependents = 0; 138 139 // track parent event associated with this event 140 // when this event is CL_COMPLETE, notify parent 141 events[eventIndex].m_parentEvent = IGIL_EVENT_INVALID_HANDLE; 142 143 events[eventIndex].m_eventType = IGIL_EVENT_TYPE_NORMAL; 144 145 // set initial state to submitted 146 events[eventIndex].m_state = CL_SUBMITTED; 147 } 148 149 return eventIndex; 150} 151 152INLINE void OVERLOADABLE IGIL_FreeEvent( clk_event_t event ) 153{ 154 // offset into the event data 155 __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents(); 156 157 atomic_xchg( &events[(int)__builtin_astype(event, __private void*)].m_state, IGIL_EVENT_UNUSED ); 158} 159 160INLINE int OVERLOADABLE IGIL_RetainEvent( __spirv_DeviceEvent in_event ) 161{ 162 // offset into the event data 163 __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents(); 164 165 int status = CLK_SUCCESS; 166 167 if( IGIL_Valid_Event( in_event ) == false ) 168 { 169 status = CLK_EVENT_ALLOCATION_FAILURE; 170 } 171 else 172 { 173 atomic_inc( &events[(int)__builtin_astype(in_event, __private void*)].m_refCount ); 174 } 175 176 return status; 177} 178 179INLINE int OVERLOADABLE IGIL_ReleaseEvent( __spirv_DeviceEvent in_event ) 180{ 181 // offset into the event data 182 __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents(); 183 184 int status = CLK_SUCCESS; 185 186 if( IGIL_Valid_Event( in_event ) == false ) 187 { 188 status = CLK_EVENT_ALLOCATION_FAILURE; 189 } 190 else 191 { 192 atomic_dec( &events[(int)__builtin_astype(in_event, __private void*)].m_refCount ); 193 194 // May not be required to be this aggressive freeing events 195 if( ( events[(int)__builtin_astype(in_event, __private void*)].m_refCount <= 0 ) && 196 ( events[(int)__builtin_astype(in_event, __private void*)].m_numChildren <= 0 ) && 197 ( events[(int)__builtin_astype(in_event, __private void*)].m_numDependents <= 0 ) ) 198 { 199 atomic_xchg( &events[(int)__builtin_astype(in_event, __private void*)].m_state, IGIL_EVENT_UNUSED ); 200 } 201 } 202 203 return status; 204} 205 206INLINE __spirv_DeviceEvent IGIL_CreateUserEvent() 207{ 208 __spirv_DeviceEvent newEvent = __builtin_astype((__private void*)(size_t)IGIL_AcquireEvent(), __spirv_DeviceEvent); 209 210 if( IGIL_Valid_Event(newEvent) == false) 211 { 212 // Now what? OpenCL 2 2.0 rev5 defines no return code for this function 213 } 214 else 215 { 216 __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents(); 217 218 events[(int)__builtin_astype(newEvent, __private void*)].m_eventType = IGIL_EVENT_TYPE_USER; 219 events[(int)__builtin_astype(newEvent, __private void*)].m_state = CL_SUBMITTED; 220 } 221 222 return newEvent; 223} 224 225INLINE void OVERLOADABLE IGIL_SetUserEventStatus( __spirv_DeviceEvent event, int state ) 226{ 227 __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents(); 228 229 if( IGIL_Valid_Event( event ) == false ) 230 { 231 // Now what? OpenCL 2 2.0 rev5 defines no return code for this function 232 } 233 else if( events[(int)__builtin_astype(event, __private void*)].m_eventType & IGIL_EVENT_TYPE_USER ) 234 { 235 // state must be CL_COMPLETE or a negative value 236 if( ( state == CL_COMPLETE ) || ( state & 0x80000000 ) ) 237 { 238 events[(int)__builtin_astype(event, __private void*)].m_state = state; 239 } 240 } 241} 242 243INLINE void OVERLOADABLE IGIL_CaptureEventProfilingInfo( __spirv_DeviceEvent event, clk_profiling_info name, __global void *value ) 244{ 245 int status = CLK_SUCCESS; 246 if( IGIL_Valid_Event( event ) == false ) 247 { 248 status = CLK_EVENT_ALLOCATION_FAILURE; 249 } 250 else if( name != CLK_PROFILING_COMMAND_EXEC_TIME ) 251 { 252 status = CLK_ENQUEUE_FAILURE; 253 } 254 else 255 { 256 __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents(); 257 events[(int)__builtin_astype(event, __private void*)].m_eventType |= IGIL_EVENT_TYPE_PROFILING; 258 events[(int)__builtin_astype(event, __private void*)].m_pProfiling = ( ulong ) value; 259 //if this function is called after event is already transitioned to CL_COMPLETE state,it means that timestamp are present, update pointer data 260 if( events[(int)__builtin_astype(event, __private void*)].m_state == CL_COMPLETE ) 261 { 262 __global ulong* retValue = ( __global ulong* ) value; 263 264 ulong StartTime = events[(int)__builtin_astype(event, __private void*)].m_profilingCmdStart; 265 ulong EndTime = events[(int)__builtin_astype(event, __private void*)].m_profilingCmdEnd; 266 ulong CompleteTime = events[(int)__builtin_astype(event, __private void*)].m_profilingCmdComplete; 267 ulong CLEndTransitionTime = 0; 268 ulong CLCompleteTransitionTime = 0; 269 270 //check if timer didn't reset by hitting max value 271 if( CompleteTime > StartTime ) 272 { 273 CLEndTransitionTime = EndTime - StartTime; 274 CLCompleteTransitionTime = CompleteTime - StartTime; 275 } 276 //if we hit this else it means that GPU timer reset to 0, compute proper delta 277 else 278 { 279 if( EndTime < StartTime ) 280 { 281 CLEndTransitionTime = PROFILING_MAX_TIMER_VALUE - StartTime + EndTime; 282 } 283 else 284 { 285 CLEndTransitionTime = EndTime - StartTime; 286 } 287 CLCompleteTransitionTime = PROFILING_MAX_TIMER_VALUE - StartTime + CompleteTime; 288 } 289 290 //first value is END - START timestamp 291 retValue[ 0 ] = ( ulong )( ( float )CLEndTransitionTime * __intel__getProfilingTimerResolution() ); 292 //second value is COMPLETE - START timestamp 293 retValue[ 1 ] = ( ulong )( ( float )CLCompleteTransitionTime * __intel__getProfilingTimerResolution() ); 294 } 295 } 296 return; 297} 298 299//===----------------------------------------------------------------------===// 300// Internal Helper Functions for Enqueue 301//===----------------------------------------------------------------------===// 302INLINE __global IGIL_CommandQueue* IGIL_GetCommandQueue( queue_t q ) 303{ 304 return __builtin_astype(q, __global IGIL_CommandQueue*); 305} 306 307INLINE bool IGIL_ValidCommandQueue( queue_t q ) 308{ 309 __global IGIL_CommandQueue *pQueue = IGIL_GetCommandQueue( q ); 310 311 if( pQueue == NULL || ( pQueue->m_magic != IGIL_MAGIC_NUMBER )) 312 { 313 return false; 314 } 315 else 316 { 317 return true; 318 } 319} 320 321INLINE __global IGIL_CommandHeader* IGIL_GetCommandHeader( queue_t q, uint offset ) 322{ 323 __global uchar *pQueueRaw = __builtin_astype(q, __global uchar*); 324 325 __global IGIL_CommandHeader* pCommand = (__global IGIL_CommandHeader*)(pQueueRaw + offset); 326 327 return pCommand; 328} 329 330INLINE void OVERLOADABLE IGIL_MEMCPY_PTOG( __global void* pDst, __private void* pSrc, int numBytes ) 331{ 332 numBytes = numBytes >> 2; 333 for( int i = 0; i < numBytes; i++ ) { 334 ((__global int*)pDst)[i] = ((__private int*)pSrc)[i]; 335 } 336} 337 338INLINE int OVERLOADABLE IGIL_ComputeRoundedBlockSize( int size ) 339{ 340 return ( 3 + size ) & ~3; 341} 342 343INLINE int OVERLOADABLE IGIL_ComputeRoundedCommandAlignment( int size ) 344{ 345 // align to multiple of an IGIL_CommandHeader. 346 return ( sizeof(IGIL_CommandHeader) - ( size % sizeof(IGIL_CommandHeader) ) ); 347} 348 349INLINE int OVERLOADABLE IGIL_ComputeRoundedCacheline( int size ) 350{ 351 return ( 64 + size ) & ~64; 352} 353 354INLINE int OVERLOADABLE IGIL_AcquireQueueSpace( queue_t q, uint numBytes ) 355{ 356 __global IGIL_CommandQueue *pQueue = IGIL_GetCommandQueue( q ); 357 358 int queueSpace = -1; 359 360 if( ( numBytes & 0x7fffffff ) == numBytes ) 361 { 362 uint requestedSize = numBytes; 363 364 // align head pointer to sizeof(IGIL_CommandHeader) - Can the runtime do this 365 // instead of this being checked for every enqueue? 366 if( pQueue->m_head == 0 ) 367 { 368 uint startingAlignment = IGIL_DEVICE_QUEUE_HEAD_INIT; 369 atomic_cmpxchg( &pQueue->m_head, 0, startingAlignment ); 370 } 371 372 uint queueHead = pQueue->m_head; 373 uint queueSize = pQueue->m_size; 374 375 // request space for this command 376 while( ( queueHead < queueSize ) && 377 ( ( queueHead + requestedSize ) < queueSize) ) 378 { 379 int attemptSpace = atomic_cmpxchg( &pQueue->m_head, queueHead, queueHead + requestedSize ); 380 381 if( attemptSpace == queueHead ) 382 { 383 queueSpace = attemptSpace; 384 break; 385 } 386 else 387 { 388 queueHead = pQueue->m_head; 389 } 390 } 391 } 392 393 return queueSpace; 394} 395 396//===----------------------------------------------------------------------===// 397// API Entry Points for Events 398//===----------------------------------------------------------------------===// 399 400#define to_spirv_event(e) (__builtin_astype(e, __spirv_DeviceEvent)) 401#define to_ocl_event(e) (__builtin_astype(e, clk_event_t)) 402 403INLINE void OVERLOADABLE retain_event(clk_event_t event) 404{ 405 SPIRV_BUILTIN(RetainEvent, _i64, )(to_spirv_event(event)); 406} 407 408INLINE void OVERLOADABLE release_event( clk_event_t event ) 409{ 410 SPIRV_BUILTIN(ReleaseEvent, _i64, )(to_spirv_event(event)); 411} 412 413INLINE clk_event_t OVERLOADABLE create_user_event() 414{ 415 return to_ocl_event(SPIRV_BUILTIN(CreateUserEvent, , )()); 416} 417 418INLINE void OVERLOADABLE set_user_event_status( clk_event_t e, int state ) 419{ 420 SPIRV_BUILTIN(SetUserEventStatus, _i64_i32, )(to_spirv_event(e), state); 421} 422 423INLINE void OVERLOADABLE capture_event_profiling_info(clk_event_t e, clk_profiling_info name, __global void* value) 424{ 425 SPIRV_BUILTIN(CaptureEventProfilingInfo, _i64_i32_p1i8, )(to_spirv_event(e), name, value); 426} 427 428INLINE bool OVERLOADABLE is_valid_event (clk_event_t event) 429{ 430 return SPIRV_BUILTIN(IsValidEvent, _i64, )(to_spirv_event(event)); 431} 432 433INLINE OVERLOADABLE queue_t get_default_queue() 434{ 435 return __builtin_astype(SPIRV_BUILTIN(GetDefaultQueue, , )(), queue_t); 436} 437 438#undef exec_offsetof 439 440#endif // __BIF_DEVICE_ENQUEUE_CL__ 441