1 /* 2 * Copyright (C) 2018-2021 Intel Corporation 3 * 4 * SPDX-License-Identifier: MIT 5 * 6 */ 7 8 #pragma once 9 #include "shared/source/helpers/flush_stamp.h" 10 #include "shared/source/os_interface/os_time.h" 11 #include "shared/source/os_interface/performance_counters.h" 12 #include "shared/source/utilities/arrayref.h" 13 #include "shared/source/utilities/hw_timestamps.h" 14 #include "shared/source/utilities/idlist.h" 15 #include "shared/source/utilities/iflist.h" 16 17 #include "opencl/source/api/cl_types.h" 18 #include "opencl/source/command_queue/copy_engine_state.h" 19 #include "opencl/source/helpers/base_object.h" 20 #include "opencl/source/helpers/task_information.h" 21 22 #include <atomic> 23 #include <cstdint> 24 #include <vector> 25 26 namespace NEO { 27 template <typename TagType> 28 class TagNode; 29 class CommandQueue; 30 class Context; 31 class Device; 32 class TimestampPacketContainer; 33 34 template <> 35 struct OpenCLObjectMapper<_cl_event> { 36 typedef class Event DerivedType; 37 }; 38 39 class Event : public BaseObject<_cl_event>, public IDNode<Event> { 40 public: 41 enum class ECallbackTarget : uint32_t { 42 Queued = 0, 43 Submitted, 44 Running, 45 Completed, 46 MAX, 47 Invalid 48 }; 49 50 struct Callback : public IFNode<Callback> { 51 typedef void(CL_CALLBACK *ClbFuncT)(cl_event, cl_int, void *); 52 53 Callback(cl_event event, ClbFuncT clb, cl_int type, void *data) 54 : event(event), callbackFunction(clb), callbackExecutionStatusTarget(type), userData(data) { 55 } 56 57 void execute() { 58 callbackFunction(event, callbackExecutionStatusTarget, userData); 59 } 60 61 int32_t getCallbackExecutionStatusTarget() const { 62 return callbackExecutionStatusTarget; 63 } 64 65 // From OCL spec : 66 // "If the callback is called as the result of the command associated with 67 // event being abnormally terminated, an appropriate error code for the error that caused 68 // the termination will be passed to event_command_exec_status instead." 69 // This function allows to override this value 70 void overrideCallbackExecutionStatusTarget(int32_t newCallbackExecutionStatusTarget) { 71 DEBUG_BREAK_IF(newCallbackExecutionStatusTarget >= 0); 72 callbackExecutionStatusTarget = newCallbackExecutionStatusTarget; 73 } 74 75 private: 76 cl_event event; 77 ClbFuncT callbackFunction; 78 int32_t callbackExecutionStatusTarget; // minimum event execution status that will triger this callback 79 void *userData; 80 }; 81 82 static const cl_ulong objectMagic = 0x80134213A43C981ALL; 83 84 Event(CommandQueue *cmdQueue, cl_command_type cmdType, 85 uint32_t taskLevel, uint32_t taskCount); 86 87 Event(const Event &) = delete; 88 Event &operator=(const Event &) = delete; 89 90 ~Event() override; 91 92 void setupBcs(aub_stream::EngineType bcsEngineType); 93 uint32_t peekBcsTaskCountFromCommandQueue(); 94 95 uint32_t getCompletionStamp() const; 96 void updateCompletionStamp(uint32_t taskCount, uint32_t bcsTaskCount, uint32_t tasklevel, FlushStamp flushStamp); 97 cl_ulong getDelta(cl_ulong startTime, 98 cl_ulong endTime); 99 void setCPUProfilingPath(bool isCPUPath) { this->profilingCpuPath = isCPUPath; } 100 bool isCPUProfilingPath() const { 101 return profilingCpuPath; 102 } 103 104 cl_int getEventProfilingInfo(cl_profiling_info paramName, 105 size_t paramValueSize, 106 void *paramValue, 107 size_t *paramValueSizeRet); 108 109 bool isProfilingEnabled() const { return profilingEnabled; } 110 111 void setProfilingEnabled(bool profilingEnabled) { this->profilingEnabled = profilingEnabled; } 112 113 TagNodeBase *getHwTimeStampNode(); 114 115 void addTimestampPacketNodes(const TimestampPacketContainer &inputTimestampPacketContainer); 116 TimestampPacketContainer *getTimestampPacketNodes() const; 117 118 bool isPerfCountersEnabled() const { 119 return perfCountersEnabled; 120 } 121 122 void setPerfCountersEnabled(bool perfCountersEnabled) { 123 this->perfCountersEnabled = perfCountersEnabled; 124 } 125 126 TagNodeBase *getHwPerfCounterNode(); 127 128 std::unique_ptr<FlushStampTracker> flushStamp; 129 std::atomic<uint32_t> taskLevel; 130 131 void addChild(Event &e); 132 133 virtual bool setStatus(cl_int status); 134 135 static cl_int waitForEvents(cl_uint numEvents, 136 const cl_event *eventList); 137 138 void setCommand(std::unique_ptr<Command> newCmd) { 139 UNRECOVERABLE_IF(cmdToSubmit.load()); 140 cmdToSubmit.exchange(newCmd.release()); 141 eventWithoutCommand = false; 142 } 143 Command *peekCommand() { 144 return cmdToSubmit; 145 } 146 147 IFNodeRef<Event> *peekChildEvents() { 148 return childEventsToNotify.peekHead(); 149 } 150 151 bool peekHasChildEvents() { 152 return (peekChildEvents() != nullptr); 153 } 154 155 bool peekHasCallbacks(ECallbackTarget target) { 156 if (target >= ECallbackTarget::MAX) { 157 DEBUG_BREAK_IF(true); 158 return false; 159 } 160 return (callbacks[(uint32_t)target].peekHead() != nullptr); 161 } 162 163 bool peekHasCallbacks() { 164 for (uint32_t i = 0; i < (uint32_t)ECallbackTarget::MAX; ++i) { 165 if (peekHasCallbacks((ECallbackTarget)i)) { 166 return true; 167 } 168 } 169 return false; 170 } 171 172 // return the number of events that are blocking this event 173 uint32_t peekNumEventsBlockingThis() const { 174 return parentCount; 175 } 176 177 // returns true if event is completed (in terms of definition provided by OCL spec) 178 // Note from OLC spec : 179 // "A command is considered complete if its execution status 180 // is CL_COMPLETE or a negative value." 181 182 bool isStatusCompleted(const int32_t executionStatusSnapshot) { 183 return executionStatusSnapshot <= CL_COMPLETE; 184 } 185 186 bool updateStatusAndCheckCompletion(); 187 188 // Note from OCL spec : 189 // "A negative integer value causes all enqueued commands that wait on this user event 190 // to be terminated." 191 bool isStatusCompletedByTermination(const int32_t executionStatusSnapshot) const { 192 return executionStatusSnapshot < 0; 193 } 194 195 bool peekIsSubmitted(const int32_t executionStatusSnapshot) const { 196 return executionStatusSnapshot == CL_SUBMITTED; 197 } 198 199 bool peekIsCmdSubmitted() { 200 return submittedCmd != nullptr; 201 } 202 203 //commands blocked by user event depencies 204 bool isReadyForSubmission(); 205 206 // adds a callback (execution state change listener) to this event's list of callbacks 207 void addCallback(Callback::ClbFuncT fn, cl_int type, void *data); 208 209 //returns true on success 210 //if(blocking==false), will return with false instead of blocking while waiting for completion 211 virtual bool wait(bool blocking, bool useQuickKmdSleep); 212 213 bool isUserEvent() const { 214 return (CL_COMMAND_USER == cmdType); 215 } 216 217 bool isEventWithoutCommand() const { 218 return eventWithoutCommand; 219 } 220 221 Context *getContext() { 222 return ctx; 223 } 224 225 CommandQueue *getCommandQueue() { 226 return cmdQueue; 227 } 228 229 const CommandQueue *getCommandQueue() const { 230 return cmdQueue; 231 } 232 233 cl_command_type getCommandType() { 234 return cmdType; 235 } 236 237 virtual uint32_t getTaskLevel(); 238 239 cl_int peekExecutionStatus() const { 240 return executionStatus; 241 } 242 243 cl_int updateEventAndReturnCurrentStatus() { 244 updateExecutionStatus(); 245 return executionStatus; 246 } 247 248 bool peekIsBlocked() const { 249 return (peekNumEventsBlockingThis() > 0); 250 } 251 252 virtual void unblockEventBy(Event &event, uint32_t taskLevel, int32_t transitionStatus); 253 254 void updateTaskCount(uint32_t gpgpuTaskCount, uint32_t bcsTaskCount) { 255 if (gpgpuTaskCount == CompletionStamp::notReady) { 256 DEBUG_BREAK_IF(true); 257 return; 258 } 259 260 this->bcsState.taskCount = bcsTaskCount; 261 uint32_t prevTaskCount = this->taskCount.exchange(gpgpuTaskCount); 262 if ((prevTaskCount != CompletionStamp::notReady) && (prevTaskCount > gpgpuTaskCount)) { 263 this->taskCount = prevTaskCount; 264 DEBUG_BREAK_IF(true); 265 } 266 } 267 268 bool isCurrentCmdQVirtualEvent() { 269 return currentCmdQVirtualEvent; 270 } 271 272 void setCurrentCmdQVirtualEvent(bool isCurrentVirtualEvent) { 273 currentCmdQVirtualEvent = isCurrentVirtualEvent; 274 } 275 276 virtual void updateExecutionStatus(); 277 void tryFlushEvent(); 278 279 uint32_t peekTaskCount() const { 280 return this->taskCount; 281 } 282 283 void setQueueTimeStamp(TimeStampData *queueTimeStamp) { 284 this->queueTimeStamp = *queueTimeStamp; 285 }; 286 287 void setQueueTimeStamp(); 288 void setSubmitTimeStamp(); 289 290 void setStartTimeStamp(); 291 void setEndTimeStamp(); 292 293 void setCmdType(uint32_t cmdType) { 294 this->cmdType = cmdType; 295 } 296 297 std::vector<Event *> &getParentEvents() { return this->parentEvents; } 298 299 virtual bool isExternallySynchronized() const { 300 return false; 301 } 302 303 static bool checkUserEventDependencies(cl_uint numEventsInWaitList, const cl_event *eventWaitList); 304 305 static void getBoundaryTimestampValues(TimestampPacketContainer *timestampContainer, uint64_t &globalStartTS, uint64_t &globalEndTS); 306 307 protected: 308 Event(Context *ctx, CommandQueue *cmdQueue, cl_command_type cmdType, 309 uint32_t taskLevel, uint32_t taskCount); 310 311 ECallbackTarget translateToCallbackTarget(cl_int execStatus) { 312 switch (execStatus) { 313 default: { 314 DEBUG_BREAK_IF(true); 315 return ECallbackTarget::Invalid; 316 } 317 318 case CL_QUEUED: 319 return ECallbackTarget::Queued; 320 case CL_SUBMITTED: 321 return ECallbackTarget::Submitted; 322 case CL_RUNNING: 323 return ECallbackTarget::Running; 324 case CL_COMPLETE: 325 return ECallbackTarget::Completed; 326 } 327 } 328 329 void calculateSubmitTimestampData(); 330 uint64_t getTimeInNSFromTimestampData(const TimeStampData ×tamp) const; 331 bool calcProfilingData(); 332 MOCKABLE_VIRTUAL void calculateProfilingDataInternal(uint64_t contextStartTS, uint64_t contextEndTS, uint64_t *contextCompleteTS, uint64_t globalStartTS); 333 MOCKABLE_VIRTUAL void synchronizeTaskCount() { 334 while (this->taskCount == CompletionStamp::notReady) 335 ; 336 }; 337 338 // executes all callbacks associated with this event 339 void executeCallbacks(int32_t executionStatus); 340 341 // transitions event to new execution state 342 // guarantees that newStatus <= oldStatus 343 void transitionExecutionStatus(int32_t newExecutionStatus) const; 344 345 //vector storing events that needs to be notified when this event is ready to go 346 IFRefList<Event, true, true> childEventsToNotify; 347 void unblockEventsBlockedByThis(int32_t transitionStatus); 348 void submitCommand(bool abortBlockedTasks); 349 350 bool currentCmdQVirtualEvent; 351 std::atomic<Command *> cmdToSubmit; 352 std::atomic<Command *> submittedCmd; 353 bool eventWithoutCommand = true; 354 355 Context *ctx; 356 CommandQueue *cmdQueue; 357 cl_command_type cmdType; 358 359 // callbacks to be executed when this event changes its execution state 360 IFList<Callback, true, true> callbacks[(uint32_t)ECallbackTarget::MAX]; 361 362 // can be accessed only with transitionExecutionState 363 // this is to ensure state consitency event when doning lock-free multithreading 364 // e.g. CL_COMPLETE -> CL_SUBMITTED or CL_SUBMITTED -> CL_QUEUED becomes forbiden 365 mutable std::atomic<int32_t> executionStatus; 366 // Timestamps 367 bool profilingEnabled; 368 bool profilingCpuPath; 369 bool dataCalculated; 370 TimeStampData queueTimeStamp; 371 TimeStampData submitTimeStamp; 372 uint64_t startTimeStamp; 373 uint64_t endTimeStamp; 374 uint64_t completeTimeStamp; 375 CopyEngineState bcsState{}; 376 bool perfCountersEnabled; 377 TagNodeBase *timeStampNode = nullptr; 378 TagNodeBase *perfCounterNode = nullptr; 379 std::unique_ptr<TimestampPacketContainer> timestampPacketContainer; 380 //number of events this event depends on 381 std::atomic<int> parentCount; 382 //event parents 383 std::vector<Event *> parentEvents; 384 385 private: 386 // can be accessed only with updateTaskCount 387 std::atomic<uint32_t> taskCount; 388 }; 389 } // namespace NEO 390