1 /* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 //! 23 //! \file cm_queue_rt.h 24 //! \brief Contains CmQueueRT declarations. 25 //! 26 27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_ 28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_ 29 30 #include "cm_queue.h" 31 32 #include <queue> 33 34 #include "cm_array.h" 35 #include "cm_csync.h" 36 #include "cm_hal.h" 37 #include "cm_log.h" 38 39 namespace CMRT_UMD 40 { 41 class CmDeviceRT; 42 class CmKernel; 43 class CmKernelRT; 44 class CmTaskInternal; 45 class CmEventRT; 46 class CmThreadSpaceRT; 47 class CmThreadGroupSpace; 48 class CmVebox; 49 class CmBuffer; 50 class CmSurface2D; 51 class CmSurface2DRT; 52 53 struct CM_GPUCOPY_KERNEL 54 { 55 CmKernel *kernel; 56 CM_GPUCOPY_KERNEL_ID kernelID; 57 bool locked; 58 }; 59 60 class ThreadSafeQueue 61 { 62 public: Push(CmTaskInternal * element)63 bool Push(CmTaskInternal *element) 64 { 65 mCriticalSection.Acquire(); 66 mQueue.push(element); 67 mCriticalSection.Release(); 68 return true; 69 } 70 Pop()71 CmTaskInternal *Pop() 72 { 73 CmTaskInternal *element = nullptr; 74 mCriticalSection.Acquire(); 75 if (mQueue.empty()) 76 { 77 CM_ASSERT(0); 78 } 79 else 80 { 81 element = mQueue.front(); 82 mQueue.pop(); 83 } 84 mCriticalSection.Release(); 85 return element; 86 } 87 Top()88 CmTaskInternal *Top() 89 { 90 CmTaskInternal *element = nullptr; 91 if (mQueue.empty()) 92 { 93 CM_ASSERT(0); 94 } 95 else 96 { 97 element = mQueue.front(); 98 } 99 return element; 100 } 101 IsEmpty()102 bool IsEmpty() { return mQueue.empty(); } 103 GetCount()104 int GetCount() { return mQueue.size(); } 105 106 private: 107 std::queue<CmTaskInternal*> mQueue; 108 CSync mCriticalSection; 109 }; 110 111 //! 112 //! \brief Class CmQueueRT definitions. 113 //! 114 class CmQueueRT: public CmQueue 115 { 116 public: 117 static int32_t Create(CmDeviceRT *device, 118 CmQueueRT *&queue, 119 CM_QUEUE_CREATE_OPTION queueCreateOption); 120 121 static int32_t Destroy(CmQueueRT *&queue); 122 123 CM_RT_API int32_t Enqueue(CmTask *task, 124 CmEvent *&event, 125 const CmThreadSpace *threadSpace = nullptr); 126 127 CM_RT_API int32_t DestroyEvent(CmEvent *&event); 128 129 CM_RT_API int32_t 130 EnqueueWithGroup(CmTask *task, 131 CmEvent *&event, 132 const CmThreadGroupSpace *threadGroupSpace = nullptr); 133 134 CM_RT_API int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event); 135 136 CM_RT_API int32_t EnqueueWithHints(CmTask *task, 137 CmEvent *&event, 138 uint32_t hints = 0); 139 140 CM_RT_API int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface, 141 const unsigned char *sysMem, 142 CmEvent *&event); 143 144 CM_RT_API int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface, 145 unsigned char *sysMem, 146 CmEvent *&event); 147 148 CM_RT_API int32_t EnqueueInitSurface2D(CmSurface2D *surf2D, 149 const uint32_t initValue, 150 CmEvent *&event); 151 152 CM_RT_API int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface, 153 CmSurface2D *inputSurface, 154 uint32_t option, 155 CmEvent *&event); 156 157 CM_RT_API int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem, 158 unsigned char *srcSysMem, 159 uint32_t size, 160 uint32_t option, 161 CmEvent *&event); 162 163 CM_RT_API int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface, 164 const unsigned char *sysMem, 165 const uint32_t widthStride, 166 const uint32_t heightStride, 167 const uint32_t option, 168 CmEvent *&event); 169 170 CM_RT_API int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface, 171 unsigned char *sysMem, 172 const uint32_t widthStride, 173 const uint32_t heightStride, 174 const uint32_t option, 175 CmEvent *&event); 176 177 CM_RT_API int32_t EnqueueFast(CmTask *task, 178 CmEvent *&event, 179 const CmThreadSpace *threadSpace = nullptr); 180 181 CM_RT_API int32_t DestroyEventFast(CmEvent *&event); 182 183 CM_RT_API int32_t EnqueueWithGroupFast(CmTask *task, 184 CmEvent *&event, 185 const CmThreadGroupSpace *threadGroupSpace = nullptr); 186 187 int32_t EnqueueCopyInternal_1Plane(CmSurface2DRT *surface, 188 unsigned char *sysMem, 189 CM_SURFACE_FORMAT format, 190 const uint32_t widthInPixel, 191 const uint32_t widthStride, 192 const uint32_t heightInRow, 193 const uint32_t heightStride, 194 const uint32_t sizePerPixel, 195 CM_GPUCOPY_DIRECTION direction, 196 const uint32_t option, 197 CmEvent *&event); 198 199 int32_t EnqueueCopyInternal_2Planes(CmSurface2DRT *surface, 200 unsigned char *sysMem, 201 CM_SURFACE_FORMAT format, 202 const uint32_t widthInPixel, 203 const uint32_t widthStride, 204 const uint32_t heightInRow, 205 const uint32_t heightStride, 206 const uint32_t sizePerPixel, 207 CM_GPUCOPY_DIRECTION direction, 208 const uint32_t option, 209 CmEvent *&event); 210 211 int32_t EnqueueCopyInternal(CmSurface2DRT *surface, 212 unsigned char *sysMem, 213 const uint32_t widthStride, 214 const uint32_t heightStride, 215 CM_GPUCOPY_DIRECTION direction, 216 const uint32_t option, 217 CmEvent *&event); 218 219 int32_t EnqueueUnalignedCopyInternal(CmSurface2DRT *surface, 220 unsigned char *sysMem, 221 const uint32_t widthStride, 222 const uint32_t heightStride, 223 CM_GPUCOPY_DIRECTION direction); 224 225 int32_t FlushTaskWithoutSync(bool flushBlocked = false); 226 227 int32_t GetTaskCount(uint32_t &numTasks); 228 229 int32_t TouchFlushedTasks(); 230 231 int32_t GetTaskHasThreadArg(CmKernelRT *kernelArray[], 232 uint32_t numKernels, 233 bool &threadArgExists); 234 int32_t CleanQueue(); 235 236 CM_QUEUE_CREATE_OPTION &GetQueueOption(); 237 238 int32_t GetOSSyncEventHandle(void *& hOSSyncEvent); 239 GetFastTrackerIndex()240 uint32_t GetFastTrackerIndex() { return m_fastTrackerIndex; } 241 StreamIndex()242 uint32_t StreamIndex() const { return m_streamIndex; } 243 244 int32_t EnqueueBufferCopy( CmBuffer* buffer, 245 size_t offset, 246 const unsigned char* sysMem, 247 uint64_t sysMemSize, 248 CM_GPUCOPY_DIRECTION dir, 249 CmEvent* wait_event, 250 CmEvent*& event, 251 uint32_t option); 252 253 protected: 254 CmQueueRT(CmDeviceRT *device, CM_QUEUE_CREATE_OPTION queueCreateOption); 255 256 ~CmQueueRT(); 257 258 int32_t Initialize(); 259 260 int32_t 261 Enqueue_RT(CmKernelRT *kernelArray[], 262 const uint32_t kernelCount, 263 const uint32_t totalThreadCount, 264 CmEventRT *&event, 265 const CmThreadSpaceRT *threadSpace = nullptr, 266 const uint64_t syncBitmap = 0, 267 PCM_POWER_OPTION powerOption = nullptr, 268 const uint64_t conditionalEndBitmap = 0, 269 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr, 270 CM_TASK_CONFIG *taskConfig = nullptr); 271 272 int32_t Enqueue_RT(CmKernelRT *kernelArray[], 273 const uint32_t kernelCount, 274 const uint32_t totalThreadCount, 275 CmEventRT *&event, 276 const CmThreadGroupSpace *threadGroupSpace = nullptr, 277 const uint64_t syncBitmap = 0, 278 PCM_POWER_OPTION powerOption = nullptr, 279 const uint64_t conditionalEndBitmap = 0, 280 PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr, 281 CM_TASK_CONFIG *taskConfig = nullptr, 282 const CM_EXECUTION_CONFIG* krnExecCfg = nullptr); 283 284 int32_t Enqueue_RT(CmKernelRT *kernelArray[], 285 CmEventRT *&event, 286 uint32_t numTaskGenerated, 287 bool isLastTask, 288 uint32_t hints = 0, 289 PCM_POWER_OPTION powerOption = nullptr); 290 291 int32_t QueryFlushedTasks(); 292 293 //New sub functions for different task flush 294 int32_t FlushGeneralTask(CmTaskInternal *task); 295 296 int32_t FlushGroupTask(CmTaskInternal *task); 297 298 int32_t FlushVeboxTask(CmTaskInternal *task); 299 300 int32_t FlushEnqueueWithHintsTask(CmTaskInternal *task); 301 302 void PopTaskFromFlushedQueue(); 303 304 int32_t CreateEvent(CmTaskInternal *task, 305 bool isVisible, 306 int32_t &taskDriverId, 307 CmEventRT *&event); 308 309 int32_t AddGPUCopyKernel(CM_GPUCOPY_KERNEL* &kernelParam); 310 311 int32_t GetGPUCopyKrnID(uint32_t widthInByte, 312 uint32_t height, 313 CM_SURFACE_FORMAT format, 314 CM_GPUCOPY_DIRECTION copyDirection, 315 CM_GPUCOPY_KERNEL_ID &kernelID); 316 317 int32_t AllocateGPUCopyKernel(uint32_t widthInByte, 318 uint32_t height, 319 CM_SURFACE_FORMAT format, 320 CM_GPUCOPY_DIRECTION copyDirection, 321 CmKernel* &kernel); 322 323 int32_t CreateGPUCopyKernel(uint32_t widthInByte, 324 uint32_t height, 325 CM_SURFACE_FORMAT format, 326 CM_GPUCOPY_DIRECTION copyDirection, 327 CM_GPUCOPY_KERNEL* &gpuCopyKernelParam); 328 329 int32_t SearchGPUCopyKernel(uint32_t widthInByte, 330 uint32_t height, 331 CM_SURFACE_FORMAT format, 332 CM_GPUCOPY_DIRECTION copyDirection, 333 CM_GPUCOPY_KERNEL* &kernelParam); 334 335 int32_t RegisterSyncEvent(); 336 337 338 CmDeviceRT *m_device; 339 ThreadSafeQueue m_enqueuedTasks; 340 ThreadSafeQueue m_flushedTasks; 341 342 CmDynamicArray m_eventArray; 343 CSync m_criticalSectionEvent; // Protect m_eventArray 344 CSync m_criticalSectionHalExecute; // Protect execution in HALCm, i.e HalCm_Execute 345 CSync m_criticalSectionFlushedTask; // Protect QueryFlushedTask 346 CSync m_criticalSectionTaskInternal; 347 348 uint32_t m_eventCount; 349 uint64_t m_CPUperformanceFrequency; 350 351 CmDynamicArray m_copyKernelParamArray; 352 uint32_t m_copyKernelParamArrayCount; 353 354 CSync m_criticalSectionGPUCopyKrn; 355 356 CM_HAL_MAX_VALUES *m_halMaxValues; 357 CM_QUEUE_CREATE_OPTION m_queueOption; 358 359 bool m_usingVirtualEngine; 360 MOS_VIRTUALENGINE_HINT_PARAMS m_mosVeHintParams; 361 362 void *m_osSyncEvent; //KMD Notification 363 364 uint32_t m_trackerIndex; 365 uint32_t m_fastTrackerIndex; 366 367 private: 368 static const uint32_t INVALID_SYNC_BUFFER_HANDLE = 0xDEADBEEF; 369 370 //-------------------------------------------------------------------------------- 371 // Create a GPU context for this object. 372 //-------------------------------------------------------------------------------- 373 MOS_STATUS CreateGpuContext(CM_HAL_STATE *halState, 374 MOS_GPU_CONTEXT gpuContextName, 375 MOS_GPU_NODE gpuNode, 376 MOS_GPUCTX_CREATOPTIONS *createOptions); 377 378 //-------------------------------------------------------------------------------- 379 // Destroy compute GPU context 380 //-------------------------------------------------------------------------------- 381 MOS_STATUS DestroyComputeGpuContext(); 382 383 //-------------------------------------------------------------------------------- 384 // Calls CM HAL API to submit a group task to command buffer. 385 //-------------------------------------------------------------------------------- 386 MOS_STATUS ExecuteGroupTask(CM_HAL_STATE *halState, 387 CM_HAL_EXEC_TASK_GROUP_PARAM *taskParam, 388 MOS_GPU_CONTEXT gpuContextName); 389 390 //-------------------------------------------------------------------------------- 391 // Calls CM HAL API to submit a general task to command buffer. 392 //-------------------------------------------------------------------------------- 393 MOS_STATUS ExecuteGeneralTask(CM_HAL_STATE *halState, 394 CM_HAL_EXEC_TASK_PARAM *taskParam, 395 MOS_GPU_CONTEXT gpuContextName); 396 397 //-------------------------------------------------------------------------------- 398 // Creates a buffer to synchronize all tasks in this queue. 399 // It's useful only on certain operating systems. 400 //-------------------------------------------------------------------------------- 401 MOS_STATUS CreateSyncBuffer(CM_HAL_STATE *halState); 402 403 //-------------------------------------------------------------------------------- 404 // Selects sync buffer in this queue so CM HAL can add it to the command buffer. 405 // It's useful only on certain operating systems. 406 //-------------------------------------------------------------------------------- 407 MOS_STATUS SelectSyncBuffer(CM_HAL_STATE *halState); 408 409 //-------------------------------------------------------------------------------- 410 // Releases sync buffer in this queue if it's created. 411 //-------------------------------------------------------------------------------- 412 MOS_STATUS ReleaseSyncBuffer(CM_HAL_STATE *halState); 413 414 #if CM_LOG_ON 415 CM_HAL_STATE* GetHalState(); 416 #endif // #if CM_LOG_ON 417 418 uint32_t m_streamIndex; 419 420 GPU_CONTEXT_HANDLE m_gpuContextHandle; 421 422 // Handle of buffer resource for synchronizing tasks in this queue. 423 uint32_t m_syncBufferHandle; 424 425 426 CmQueueRT(const CmQueueRT& other); 427 CmQueueRT& operator=(const CmQueueRT& other); 428 }; 429 }; //namespace 430 431 #endif // #ifnfef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_ 432