1/*========================== begin_copyright_notice ============================
2
3Copyright (C) 2017-2021 Intel Corporation
4
5SPDX-License-Identifier: MIT
6
7============================= end_copyright_notice ===========================*/
8
9//===-  IGILBiF_Device_Enqueue.cl - IGIL device enqueue functions   -===//
10//
11// This file defines IGIL builtin versions of OpenCL device enqueue.
12//
13//===----------------------------------------------------------------===//
14
15#ifndef __BIF_DEVICE_ENQUEUE_CL__
16#define __BIF_DEVICE_ENQUEUE_CL__
17
18#define __EXECUTION_MODEL_DEBUG
19#include "DeviceEnqueueHelpers.h"
20
21extern __constant int __DashGSpecified;
22
23#define exec_offsetof( x, y ) (int)(&((x*)(0))->y)
24
25// float passed as int
26extern __constant int __ProfilingTimerResolution;
27INLINE float __intel__getProfilingTimerResolution()
28{
29    return as_float(__ProfilingTimerResolution);
30}
31
32//===----------------------------------------------------------------------===//
33// Internal Helper Functions for Events
34//===----------------------------------------------------------------------===//
35
36///////////////////////////////////////////////////////////////////////////
37//
38// If -g is specified, we are allowed to return a more specific error code
39// indicating why enqueue_kernel() failed.
40//
41INLINE int __intel_ErrorCode(int code)
42{
43    if (__DashGSpecified)
44    {
45        return code;
46    }
47    else
48    {
49        return CLK_ENQUEUE_FAILURE;
50    }
51}
52
53 __global IGIL_EventPool* IGIL_GetEventPool()
54{
55    return (__global IGIL_EventPool*)__builtin_IB_get_event_pool();
56}
57
58__global IGIL_DeviceEvent* IGIL_GetDeviceEvents()
59{
60    __global IGIL_EventPool *pool = IGIL_GetEventPool();
61
62    return (__global IGIL_DeviceEvent *)(pool + 1);
63}
64
65INLINE bool OVERLOADABLE IGIL_Valid_Event( __spirv_DeviceEvent in_event )
66{
67     // Get the event pool
68    __global IGIL_EventPool *pool = IGIL_GetEventPool();
69
70    bool retValue = true;
71
72    if( ( ( int )(__builtin_astype(in_event, __private void*)) >= pool->m_size ) ||
73        ( IGIL_EVENT_INVALID_HANDLE == (size_t)__builtin_astype(in_event, __private void*) ) )
74    {
75        retValue = false;
76    }
77
78    return retValue;
79}
80
81INLINE int IGIL_AcquireEvent()
82{
83    // Get the event pool
84    __global IGIL_EventPool *pool = IGIL_GetEventPool();
85
86    // offset into the event data in the pool
87    __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents();
88
89    uint poolSize = pool->m_size;
90    uint poolHead = pool->m_head;
91
92    int eventIndex = IGIL_EVENT_INVALID_HANDLE;
93
94    // Get an event index
95    while( poolHead < poolSize )
96    {
97        int attemptIndex = atomic_cmpxchg( &pool->m_head, poolHead, poolHead + 1 );
98
99        if( attemptIndex == poolHead )
100        {
101            eventIndex = attemptIndex;
102            break;
103        }
104        else
105        {
106            poolHead = pool->m_head;
107        }
108    }
109
110    // Event pool has filled up - do a linear search for previously
111    // freed events
112    if( eventIndex == IGIL_EVENT_INVALID_HANDLE )
113    {
114        for( int i = 0; i < poolSize; i++ )
115        {
116            int status = atomic_cmpxchg( &events[i].m_state, IGIL_EVENT_UNUSED, IGIL_EVENT_QUEUED );
117
118            if( IGIL_EVENT_UNUSED == status )
119            {
120                // found an unused event. return this handle.
121                eventIndex = i;
122            }
123        }
124    }
125
126    if( eventIndex != IGIL_EVENT_INVALID_HANDLE )
127    {
128        // creation of event sets reference count to 1
129        events[eventIndex].m_refCount = 1;
130
131        // create with no outstanding child
132        // act of enqueue using this event will increment num children
133        // hence, a kernel with an m_event is its own child; this count is decremented in UpdateEventStatus
134        events[eventIndex].m_numChildren = 0;
135
136        // no commands have been made dependent on this event yet, refcount = 0
137        events[eventIndex].m_numDependents = 0;
138
139        // track parent event associated with this event
140        // when this event is CL_COMPLETE, notify parent
141        events[eventIndex].m_parentEvent = IGIL_EVENT_INVALID_HANDLE;
142
143        events[eventIndex].m_eventType = IGIL_EVENT_TYPE_NORMAL;
144
145        // set initial state to submitted
146        events[eventIndex].m_state = CL_SUBMITTED;
147    }
148
149    return eventIndex;
150}
151
152INLINE void OVERLOADABLE IGIL_FreeEvent( clk_event_t event )
153{
154    // offset into the event data
155    __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents();
156
157    atomic_xchg( &events[(int)__builtin_astype(event, __private void*)].m_state, IGIL_EVENT_UNUSED );
158}
159
160INLINE int OVERLOADABLE IGIL_RetainEvent( __spirv_DeviceEvent in_event )
161{
162    // offset into the event data
163    __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents();
164
165    int status = CLK_SUCCESS;
166
167    if( IGIL_Valid_Event( in_event ) == false )
168    {
169        status = CLK_EVENT_ALLOCATION_FAILURE;
170    }
171    else
172    {
173        atomic_inc( &events[(int)__builtin_astype(in_event, __private void*)].m_refCount );
174    }
175
176    return status;
177}
178
179INLINE int OVERLOADABLE IGIL_ReleaseEvent( __spirv_DeviceEvent in_event )
180{
181    // offset into the event data
182    __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents();
183
184    int status = CLK_SUCCESS;
185
186    if( IGIL_Valid_Event( in_event ) == false )
187    {
188      status = CLK_EVENT_ALLOCATION_FAILURE;
189    }
190    else
191    {
192        atomic_dec( &events[(int)__builtin_astype(in_event, __private void*)].m_refCount );
193
194        // May not be required to be this aggressive freeing events
195        if( ( events[(int)__builtin_astype(in_event, __private void*)].m_refCount <= 0 ) &&
196            ( events[(int)__builtin_astype(in_event, __private void*)].m_numChildren <= 0 ) &&
197            ( events[(int)__builtin_astype(in_event, __private void*)].m_numDependents <= 0 ) )
198        {
199            atomic_xchg( &events[(int)__builtin_astype(in_event, __private void*)].m_state, IGIL_EVENT_UNUSED );
200        }
201    }
202
203    return status;
204}
205
206INLINE __spirv_DeviceEvent IGIL_CreateUserEvent()
207{
208    __spirv_DeviceEvent newEvent = __builtin_astype((__private void*)(size_t)IGIL_AcquireEvent(), __spirv_DeviceEvent);
209
210    if( IGIL_Valid_Event(newEvent) == false)
211    {
212        // Now what?  OpenCL 2 2.0 rev5 defines no return code for this function
213    }
214    else
215    {
216        __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents();
217
218        events[(int)__builtin_astype(newEvent, __private void*)].m_eventType = IGIL_EVENT_TYPE_USER;
219        events[(int)__builtin_astype(newEvent, __private void*)].m_state = CL_SUBMITTED;
220    }
221
222    return newEvent;
223}
224
225INLINE void OVERLOADABLE IGIL_SetUserEventStatus( __spirv_DeviceEvent event, int state )
226{
227    __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents();
228
229    if( IGIL_Valid_Event( event ) == false )
230    {
231        // Now what?  OpenCL 2 2.0 rev5 defines no return code for this function
232    }
233    else if( events[(int)__builtin_astype(event, __private void*)].m_eventType & IGIL_EVENT_TYPE_USER )
234    {
235        // state must be CL_COMPLETE or a negative value
236        if( ( state == CL_COMPLETE ) || ( state & 0x80000000 ) )
237        {
238            events[(int)__builtin_astype(event, __private void*)].m_state = state;
239        }
240    }
241}
242
243INLINE void OVERLOADABLE IGIL_CaptureEventProfilingInfo( __spirv_DeviceEvent event, clk_profiling_info name,  __global void *value )
244{
245    int status = CLK_SUCCESS;
246    if( IGIL_Valid_Event( event ) == false )
247    {
248        status = CLK_EVENT_ALLOCATION_FAILURE;
249    }
250    else if( name != CLK_PROFILING_COMMAND_EXEC_TIME )
251    {
252        status = CLK_ENQUEUE_FAILURE;
253    }
254    else
255    {
256        __global IGIL_DeviceEvent *events = IGIL_GetDeviceEvents();
257        events[(int)__builtin_astype(event, __private void*)].m_eventType |= IGIL_EVENT_TYPE_PROFILING;
258        events[(int)__builtin_astype(event, __private void*)].m_pProfiling = ( ulong ) value;
259        //if this function is called after event is already transitioned to CL_COMPLETE state,it means that timestamp are present, update pointer data
260        if( events[(int)__builtin_astype(event, __private void*)].m_state == CL_COMPLETE )
261        {
262            __global ulong* retValue = ( __global ulong* ) value;
263
264            ulong StartTime                = events[(int)__builtin_astype(event, __private void*)].m_profilingCmdStart;
265            ulong EndTime                  = events[(int)__builtin_astype(event, __private void*)].m_profilingCmdEnd;
266            ulong CompleteTime             = events[(int)__builtin_astype(event, __private void*)].m_profilingCmdComplete;
267            ulong CLEndTransitionTime      = 0;
268            ulong CLCompleteTransitionTime = 0;
269
270            //check if timer didn't reset by hitting max value
271            if( CompleteTime > StartTime )
272            {
273                CLEndTransitionTime      = EndTime - StartTime;
274                CLCompleteTransitionTime = CompleteTime - StartTime;
275            }
276            //if we hit this else it means that GPU timer reset to 0, compute proper delta
277            else
278            {
279                if( EndTime < StartTime )
280                {
281                    CLEndTransitionTime = PROFILING_MAX_TIMER_VALUE - StartTime + EndTime;
282                }
283                else
284                {
285                    CLEndTransitionTime = EndTime - StartTime;
286                }
287                CLCompleteTransitionTime = PROFILING_MAX_TIMER_VALUE - StartTime + CompleteTime;
288            }
289
290            //first value is END - START timestamp
291            retValue[ 0 ] = ( ulong )( ( float )CLEndTransitionTime * __intel__getProfilingTimerResolution() );
292            //second value is COMPLETE - START timestamp
293            retValue[ 1 ] = ( ulong )( ( float )CLCompleteTransitionTime * __intel__getProfilingTimerResolution() );
294        }
295    }
296    return;
297}
298
299//===----------------------------------------------------------------------===//
300// Internal Helper Functions for Enqueue
301//===----------------------------------------------------------------------===//
302INLINE __global IGIL_CommandQueue* IGIL_GetCommandQueue( queue_t q )
303{
304    return __builtin_astype(q, __global IGIL_CommandQueue*);
305}
306
307INLINE bool IGIL_ValidCommandQueue( queue_t q )
308{
309   __global IGIL_CommandQueue *pQueue =  IGIL_GetCommandQueue( q );
310
311   if( pQueue == NULL || ( pQueue->m_magic != IGIL_MAGIC_NUMBER ))
312   {
313        return false;
314   }
315   else
316   {
317        return true;
318   }
319}
320
321INLINE __global IGIL_CommandHeader* IGIL_GetCommandHeader( queue_t q, uint offset )
322{
323    __global uchar *pQueueRaw = __builtin_astype(q, __global uchar*);
324
325    __global IGIL_CommandHeader* pCommand = (__global IGIL_CommandHeader*)(pQueueRaw + offset);
326
327    return pCommand;
328}
329
330INLINE void OVERLOADABLE IGIL_MEMCPY_PTOG( __global void* pDst, __private void* pSrc, int numBytes )
331{
332    numBytes = numBytes >> 2;
333    for( int i = 0; i < numBytes; i++ ) {
334        ((__global int*)pDst)[i] = ((__private int*)pSrc)[i];
335    }
336}
337
338INLINE int OVERLOADABLE IGIL_ComputeRoundedBlockSize( int size )
339{
340    return ( 3 + size ) & ~3;
341}
342
343INLINE int OVERLOADABLE IGIL_ComputeRoundedCommandAlignment( int size )
344{
345    // align to multiple of an IGIL_CommandHeader.
346    return ( sizeof(IGIL_CommandHeader) - ( size % sizeof(IGIL_CommandHeader) ) );
347}
348
349INLINE int OVERLOADABLE IGIL_ComputeRoundedCacheline( int size )
350{
351    return ( 64 + size ) & ~64;
352}
353
354INLINE int OVERLOADABLE IGIL_AcquireQueueSpace( queue_t q, uint numBytes )
355{
356    __global IGIL_CommandQueue *pQueue = IGIL_GetCommandQueue( q );
357
358    int queueSpace = -1;
359
360    if( ( numBytes & 0x7fffffff ) == numBytes )
361    {
362        uint requestedSize = numBytes;
363
364        // align head pointer to sizeof(IGIL_CommandHeader) - Can the runtime do this
365        // instead of this  being checked for every enqueue?
366        if( pQueue->m_head == 0 )
367        {
368            uint startingAlignment = IGIL_DEVICE_QUEUE_HEAD_INIT;
369            atomic_cmpxchg( &pQueue->m_head, 0, startingAlignment );
370        }
371
372        uint queueHead = pQueue->m_head;
373        uint queueSize = pQueue->m_size;
374
375        // request space for this command
376        while( ( queueHead < queueSize ) &&
377               ( ( queueHead + requestedSize ) < queueSize) )
378        {
379            int attemptSpace = atomic_cmpxchg( &pQueue->m_head, queueHead, queueHead + requestedSize );
380
381            if( attemptSpace == queueHead )
382            {
383                queueSpace = attemptSpace;
384                break;
385            }
386            else
387            {
388                queueHead = pQueue->m_head;
389            }
390        }
391    }
392
393    return queueSpace;
394}
395
396//===----------------------------------------------------------------------===//
397// API Entry Points for Events
398//===----------------------------------------------------------------------===//
399
400#define to_spirv_event(e) (__builtin_astype(e, __spirv_DeviceEvent))
401#define to_ocl_event(e)   (__builtin_astype(e, clk_event_t))
402
403INLINE void OVERLOADABLE retain_event(clk_event_t event)
404{
405    SPIRV_BUILTIN(RetainEvent, _i64, )(to_spirv_event(event));
406}
407
408INLINE void OVERLOADABLE release_event( clk_event_t event )
409{
410    SPIRV_BUILTIN(ReleaseEvent, _i64, )(to_spirv_event(event));
411}
412
413INLINE clk_event_t OVERLOADABLE create_user_event()
414{
415    return to_ocl_event(SPIRV_BUILTIN(CreateUserEvent, , )());
416}
417
418INLINE void OVERLOADABLE set_user_event_status( clk_event_t e, int state )
419{
420    SPIRV_BUILTIN(SetUserEventStatus, _i64_i32, )(to_spirv_event(e), state);
421}
422
423INLINE void OVERLOADABLE capture_event_profiling_info(clk_event_t e, clk_profiling_info name, __global void* value)
424{
425    SPIRV_BUILTIN(CaptureEventProfilingInfo, _i64_i32_p1i8, )(to_spirv_event(e), name, value);
426}
427
428INLINE bool OVERLOADABLE is_valid_event (clk_event_t event)
429{
430    return SPIRV_BUILTIN(IsValidEvent, _i64, )(to_spirv_event(event));
431}
432
433INLINE OVERLOADABLE queue_t get_default_queue()
434{
435    return __builtin_astype(SPIRV_BUILTIN(GetDefaultQueue, , )(), queue_t);
436}
437
438#undef exec_offsetof
439
440#endif // __BIF_DEVICE_ENQUEUE_CL__
441