1 /*
2  * Copyright (C) 2018-2021 Intel Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  *
6  */
7 
8 #pragma once
9 #include "shared/source/helpers/flush_stamp.h"
10 #include "shared/source/os_interface/os_time.h"
11 #include "shared/source/os_interface/performance_counters.h"
12 #include "shared/source/utilities/arrayref.h"
13 #include "shared/source/utilities/hw_timestamps.h"
14 #include "shared/source/utilities/idlist.h"
15 #include "shared/source/utilities/iflist.h"
16 
17 #include "opencl/source/api/cl_types.h"
18 #include "opencl/source/command_queue/copy_engine_state.h"
19 #include "opencl/source/helpers/base_object.h"
20 #include "opencl/source/helpers/task_information.h"
21 
22 #include <atomic>
23 #include <cstdint>
24 #include <vector>
25 
26 namespace NEO {
27 template <typename TagType>
28 class TagNode;
29 class CommandQueue;
30 class Context;
31 class Device;
32 class TimestampPacketContainer;
33 
34 template <>
35 struct OpenCLObjectMapper<_cl_event> {
36     typedef class Event DerivedType;
37 };
38 
39 class Event : public BaseObject<_cl_event>, public IDNode<Event> {
40   public:
41     enum class ECallbackTarget : uint32_t {
42         Queued = 0,
43         Submitted,
44         Running,
45         Completed,
46         MAX,
47         Invalid
48     };
49 
50     struct Callback : public IFNode<Callback> {
51         typedef void(CL_CALLBACK *ClbFuncT)(cl_event, cl_int, void *);
52 
53         Callback(cl_event event, ClbFuncT clb, cl_int type, void *data)
54             : event(event), callbackFunction(clb), callbackExecutionStatusTarget(type), userData(data) {
55         }
56 
57         void execute() {
58             callbackFunction(event, callbackExecutionStatusTarget, userData);
59         }
60 
61         int32_t getCallbackExecutionStatusTarget() const {
62             return callbackExecutionStatusTarget;
63         }
64 
65         // From OCL spec :
66         //     "If the callback is called as the result of the command associated with
67         //      event being abnormally terminated, an appropriate error code for the error that caused
68         //      the termination will be passed to event_command_exec_status instead."
69         // This function allows to override this value
70         void overrideCallbackExecutionStatusTarget(int32_t newCallbackExecutionStatusTarget) {
71             DEBUG_BREAK_IF(newCallbackExecutionStatusTarget >= 0);
72             callbackExecutionStatusTarget = newCallbackExecutionStatusTarget;
73         }
74 
75       private:
76         cl_event event;
77         ClbFuncT callbackFunction;
78         int32_t callbackExecutionStatusTarget; // minimum event execution status that will triger this callback
79         void *userData;
80     };
81 
82     static const cl_ulong objectMagic = 0x80134213A43C981ALL;
83 
84     Event(CommandQueue *cmdQueue, cl_command_type cmdType,
85           uint32_t taskLevel, uint32_t taskCount);
86 
87     Event(const Event &) = delete;
88     Event &operator=(const Event &) = delete;
89 
90     ~Event() override;
91 
92     void setupBcs(aub_stream::EngineType bcsEngineType);
93     uint32_t peekBcsTaskCountFromCommandQueue();
94 
95     uint32_t getCompletionStamp() const;
96     void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp);
97     cl_ulong getDelta(cl_ulong startTime,
98                       cl_ulong endTime);
99     void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; }
100     bool isCPUProfilingPath() const {
101         return profilingCpuPath;
102     }
103 
104     cl_int getEventProfilingInfo(cl_profiling_info paramName,
105                                  size_t paramValueSize,
106                                  void *paramValue,
107                                  size_t *paramValueSizeRet);
108 
109     bool isProfilingEnabled() const { return profilingEnabled; }
110 
111     void setProfilingEnabled(bool profilingEnabled) { this->profilingEnabled = profilingEnabled; }
112 
113     TagNodeBase *getHwTimeStampNode();
114 
115     void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer);
116     TimestampPacketContainer *getTimestampPacketNodes() const;
117 
118     bool isPerfCountersEnabled() const {
119         return perfCountersEnabled;
120     }
121 
122     void setPerfCountersEnabled(bool perfCountersEnabled) {
123         this->perfCountersEnabled = perfCountersEnabled;
124     }
125 
126     TagNodeBase *getHwPerfCounterNode();
127 
128     std::unique_ptr<FlushStampTracker> flushStamp;
129     std::atomic<uint32_t> taskLevel;
130 
131     void addChild(Event &e);
132 
133     virtual bool setStatus(cl_int status);
134 
135     static cl_int waitForEvents(cl_uint numEvents,
136                                 const cl_event *eventList);
137 
138     void setCommand(std::unique_ptr<Command> newCmd) {
139         UNRECOVERABLE_IF(cmdToSubmit.load());
140         cmdToSubmit.exchange(newCmd.release());
141         eventWithoutCommand = false;
142     }
143     Command *peekCommand() {
144         return cmdToSubmit;
145     }
146 
147     IFNodeRef<Event> *peekChildEvents() {
148         return childEventsToNotify.peekHead();
149     }
150 
151     bool peekHasChildEvents() {
152         return (peekChildEvents() != nullptr);
153     }
154 
155     bool peekHasCallbacks(ECallbackTarget target) {
156         if (target >= ECallbackTarget::MAX) {
157             DEBUG_BREAK_IF(true);
158             return false;
159         }
160         return (callbacks[(uint32_t)target].peekHead() != nullptr);
161     }
162 
163     bool peekHasCallbacks() {
164         for (uint32_t i = 0; i < (uint32_t)ECallbackTarget::MAX; ++i) {
165             if (peekHasCallbacks((ECallbackTarget)i)) {
166                 return true;
167             }
168         }
169         return false;
170     }
171 
172     // return the number of events that are blocking this event
173     uint32_t peekNumEventsBlockingThis() const {
174         return parentCount;
175     }
176 
177     // returns true if event is completed (in terms of definition provided by OCL spec)
178     // Note from OLC spec :
179     //    "A command is considered complete if its execution status
180     //     is CL_COMPLETE or a negative value."
181 
182     bool isStatusCompleted(const int32_t executionStatusSnapshot) {
183         return executionStatusSnapshot <= CL_COMPLETE;
184     }
185 
186     bool updateStatusAndCheckCompletion();
187 
188     // Note from OCL spec :
189     //      "A negative integer value causes all enqueued commands that wait on this user event
190     //       to be terminated."
191     bool isStatusCompletedByTermination(const int32_t executionStatusSnapshot) const {
192         return executionStatusSnapshot < 0;
193     }
194 
195     bool peekIsSubmitted(const int32_t executionStatusSnapshot) const {
196         return executionStatusSnapshot == CL_SUBMITTED;
197     }
198 
199     bool peekIsCmdSubmitted() {
200         return submittedCmd != nullptr;
201     }
202 
203     //commands blocked by user event depencies
204     bool isReadyForSubmission();
205 
206     // adds a callback (execution state change listener) to this event's list of callbacks
207     void addCallback(Callback::ClbFuncT fn, cl_int type, void *data);
208 
209     //returns true on success
210     //if(blocking==false), will return with false instead of blocking while waiting for completion
211     virtual bool wait(bool blocking, bool useQuickKmdSleep);
212 
213     bool isUserEvent() const {
214         return (CL_COMMAND_USER == cmdType);
215     }
216 
217     bool isEventWithoutCommand() const {
218         return eventWithoutCommand;
219     }
220 
221     Context *getContext() {
222         return ctx;
223     }
224 
225     CommandQueue *getCommandQueue() {
226         return cmdQueue;
227     }
228 
229     const CommandQueue *getCommandQueue() const {
230         return cmdQueue;
231     }
232 
233     cl_command_type getCommandType() {
234         return cmdType;
235     }
236 
237     virtual uint32_t getTaskLevel();
238 
239     cl_int peekExecutionStatus() const {
240         return executionStatus;
241     }
242 
243     cl_int updateEventAndReturnCurrentStatus() {
244         updateExecutionStatus();
245         return executionStatus;
246     }
247 
248     bool peekIsBlocked() const {
249         return (peekNumEventsBlockingThis() > 0);
250     }
251 
252     virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus);
253 
254     void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) {
255         if (gpgpuTaskCount == CompletionStamp::notReady) {
256             DEBUG_BREAK_IF(true);
257             return;
258         }
259 
260         this->bcsState.taskCount = bcsTaskCount;
261         uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount);
262         if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) {
263             this->taskCount = prevTaskCount;
264             DEBUG_BREAK_IF(true);
265         }
266     }
267 
268     bool isCurrentCmdQVirtualEvent() {
269         return currentCmdQVirtualEvent;
270     }
271 
272     void setCurrentCmdQVirtualEvent(bool isCurrentVirtualEvent) {
273         currentCmdQVirtualEvent = isCurrentVirtualEvent;
274     }
275 
276     virtual void updateExecutionStatus();
277     void tryFlushEvent();
278 
279     uint32_t peekTaskCount() const {
280         return this->taskCount;
281     }
282 
283     void setQueueTimeStamp(TimeStampData *queueTimeStamp) {
284         this->queueTimeStamp = *queueTimeStamp;
285     };
286 
287     void setQueueTimeStamp();
288     void setSubmitTimeStamp();
289 
290     void setStartTimeStamp();
291     void setEndTimeStamp();
292 
293     void setCmdType(uint32_t cmdType) {
294         this->cmdType = cmdType;
295     }
296 
297     std::vector<Event *> &getParentEvents() { return this->parentEvents; }
298 
299     virtual bool isExternallySynchronized() const {
300         return false;
301     }
302 
303     static bool checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList);
304 
305     static void getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS);
306 
307   protected:
308     Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType,
309           uint32_t taskLevel, uint32_t taskCount);
310 
311     ECallbackTarget translateToCallbackTarget(cl_int execStatus) {
312         switch (execStatus) {
313         default: {
314             DEBUG_BREAK_IF(true);
315             return ECallbackTarget::Invalid;
316         }
317 
318         case CL_QUEUED:
319             return ECallbackTarget::Queued;
320         case CL_SUBMITTED:
321             return ECallbackTarget::Submitted;
322         case CL_RUNNING:
323             return ECallbackTarget::Running;
324         case CL_COMPLETE:
325             return ECallbackTarget::Completed;
326         }
327     }
328 
329     void calculateSubmitTimestampData();
330     uint64_t getTimeInNSFromTimestampData(const TimeStampData &timestamp) const;
331     bool calcProfilingData();
332     MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS);
333     MOCKABLE_VIRTUAL void synchronizeTaskCount() {
334         while (this->taskCount == CompletionStamp::notReady)
335             ;
336     };
337 
338     // executes all callbacks associated with this event
339     void executeCallbacks(int32_t executionStatus);
340 
341     // transitions event to new execution state
342     // guarantees that newStatus <= oldStatus
343     void transitionExecutionStatus(int32_t newExecutionStatus) const;
344 
345     //vector storing events that needs to be notified when this event is ready to go
346     IFRefList<Event, true, true> childEventsToNotify;
347     void unblockEventsBlockedByThis(int32_t transitionStatus);
348     void submitCommand(bool abortBlockedTasks);
349 
350     bool currentCmdQVirtualEvent;
351     std::atomic<Command *> cmdToSubmit;
352     std::atomic<Command *> submittedCmd;
353     bool eventWithoutCommand = true;
354 
355     Context *ctx;
356     CommandQueue *cmdQueue;
357     cl_command_type cmdType;
358 
359     // callbacks to be executed when this event changes its execution state
360     IFList<Callback, true, true> callbacks[(uint32_t)ECallbackTarget::MAX];
361 
362     // can be accessed only with transitionExecutionState
363     // this is to ensure state consitency event when doning lock-free multithreading
364     // e.g. CL_COMPLETE -> CL_SUBMITTED or CL_SUBMITTED -> CL_QUEUED becomes forbiden
365     mutable std::atomic<int32_t> executionStatus;
366     // Timestamps
367     bool profilingEnabled;
368     bool profilingCpuPath;
369     bool dataCalculated;
370     TimeStampData queueTimeStamp;
371     TimeStampData submitTimeStamp;
372     uint64_t startTimeStamp;
373     uint64_t endTimeStamp;
374     uint64_t completeTimeStamp;
375     CopyEngineState bcsState{};
376     bool perfCountersEnabled;
377     TagNodeBase *timeStampNode = nullptr;
378     TagNodeBase *perfCounterNode = nullptr;
379     std::unique_ptr<TimestampPacketContainer> timestampPacketContainer;
380     //number of events this event depends on
381     std::atomic<int> parentCount;
382     //event parents
383     std::vector<Event *> parentEvents;
384 
385   private:
386     // can be accessed only with updateTaskCount
387     std::atomic<uint32_t> taskCount;
388 };
389 } // namespace NEO
390