1 /*
2 * Copyright (c) 2017, Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 //!
23 //! \file      cm_queue_rt.h
24 //! \brief     Contains CmQueueRT declarations.
25 //!
26 
27 #ifndef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_
28 #define MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_
29 
30 #include "cm_queue.h"
31 
32 #include <queue>
33 
34 #include "cm_array.h"
35 #include "cm_csync.h"
36 #include "cm_hal.h"
37 #include "cm_log.h"
38 
39 namespace CMRT_UMD
40 {
41 class CmDeviceRT;
42 class CmKernel;
43 class CmKernelRT;
44 class CmTaskInternal;
45 class CmEventRT;
46 class CmThreadSpaceRT;
47 class CmThreadGroupSpace;
48 class CmVebox;
49 class CmBuffer;
50 class CmSurface2D;
51 class CmSurface2DRT;
52 
53 struct CM_GPUCOPY_KERNEL
54 {
55     CmKernel *kernel;
56     CM_GPUCOPY_KERNEL_ID kernelID;
57     bool locked;
58 };
59 
60 class ThreadSafeQueue
61 {
62 public:
Push(CmTaskInternal * element)63     bool Push(CmTaskInternal *element)
64     {
65         mCriticalSection.Acquire();
66         mQueue.push(element);
67         mCriticalSection.Release();
68         return true;
69     }
70 
Pop()71     CmTaskInternal *Pop()
72     {
73         CmTaskInternal *element = nullptr;
74         mCriticalSection.Acquire();
75         if (mQueue.empty())
76         {
77             CM_ASSERT(0);
78         }
79         else
80         {
81             element = mQueue.front();
82             mQueue.pop();
83         }
84         mCriticalSection.Release();
85         return element;
86     }
87 
Top()88     CmTaskInternal *Top()
89     {
90         CmTaskInternal *element = nullptr;
91         if (mQueue.empty())
92         {
93             CM_ASSERT(0);
94         }
95         else
96         {
97             element = mQueue.front();
98         }
99         return element;
100     }
101 
IsEmpty()102     bool IsEmpty() { return mQueue.empty(); }
103 
GetCount()104     int GetCount() { return mQueue.size(); }
105 
106 private:
107     std::queue<CmTaskInternal*> mQueue;
108     CSync mCriticalSection;
109 };
110 
111 //!
112 //! \brief    Class CmQueueRT definitions.
113 //!
114 class CmQueueRT: public CmQueue
115 {
116 public:
117     static int32_t Create(CmDeviceRT *device,
118                           CmQueueRT *&queue,
119                           CM_QUEUE_CREATE_OPTION queueCreateOption);
120 
121     static int32_t Destroy(CmQueueRT *&queue);
122 
123     CM_RT_API int32_t Enqueue(CmTask *task,
124                               CmEvent *&event,
125                               const CmThreadSpace *threadSpace = nullptr);
126 
127     CM_RT_API int32_t DestroyEvent(CmEvent *&event);
128 
129     CM_RT_API int32_t
130     EnqueueWithGroup(CmTask *task,
131                      CmEvent *&event,
132                      const CmThreadGroupSpace *threadGroupSpace = nullptr);
133 
134     CM_RT_API int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event);
135 
136     CM_RT_API int32_t EnqueueWithHints(CmTask *task,
137                                        CmEvent *&event,
138                                        uint32_t hints = 0);
139 
140     CM_RT_API int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface,
141                                           const unsigned char *sysMem,
142                                           CmEvent *&event);
143 
144     CM_RT_API int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface,
145                                           unsigned char *sysMem,
146                                           CmEvent *&event);
147 
148     CM_RT_API int32_t EnqueueInitSurface2D(CmSurface2D *surf2D,
149                                            const uint32_t initValue,
150                                            CmEvent *&event);
151 
152     CM_RT_API int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface,
153                                           CmSurface2D *inputSurface,
154                                           uint32_t option,
155                                           CmEvent *&event);
156 
157     CM_RT_API int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem,
158                                           unsigned char *srcSysMem,
159                                           uint32_t size,
160                                           uint32_t option,
161                                           CmEvent *&event);
162 
163     CM_RT_API int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface,
164                                                     const unsigned char *sysMem,
165                                                     const uint32_t widthStride,
166                                                     const uint32_t heightStride,
167                                                     const uint32_t option,
168                                                     CmEvent *&event);
169 
170     CM_RT_API int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface,
171                                                     unsigned char *sysMem,
172                                                     const uint32_t widthStride,
173                                                     const uint32_t heightStride,
174                                                     const uint32_t option,
175                                                     CmEvent *&event);
176 
177     CM_RT_API int32_t EnqueueFast(CmTask *task,
178                               CmEvent *&event,
179                               const CmThreadSpace *threadSpace = nullptr);
180 
181     CM_RT_API int32_t DestroyEventFast(CmEvent *&event);
182 
183     CM_RT_API int32_t EnqueueWithGroupFast(CmTask *task,
184                                       CmEvent *&event,
185                                       const CmThreadGroupSpace *threadGroupSpace = nullptr);
186 
187     int32_t EnqueueCopyInternal_1Plane(CmSurface2DRT *surface,
188                                        unsigned char *sysMem,
189                                        CM_SURFACE_FORMAT format,
190                                        const uint32_t widthInPixel,
191                                        const uint32_t widthStride,
192                                        const uint32_t heightInRow,
193                                        const uint32_t heightStride,
194                                        const uint32_t sizePerPixel,
195                                        CM_GPUCOPY_DIRECTION direction,
196                                        const uint32_t option,
197                                        CmEvent *&event);
198 
199     int32_t EnqueueCopyInternal_2Planes(CmSurface2DRT *surface,
200                                         unsigned char *sysMem,
201                                         CM_SURFACE_FORMAT format,
202                                         const uint32_t widthInPixel,
203                                         const uint32_t widthStride,
204                                         const uint32_t heightInRow,
205                                         const uint32_t heightStride,
206                                         const uint32_t sizePerPixel,
207                                         CM_GPUCOPY_DIRECTION direction,
208                                         const uint32_t option,
209                                         CmEvent *&event);
210 
211     int32_t EnqueueCopyInternal(CmSurface2DRT *surface,
212                                 unsigned char *sysMem,
213                                 const uint32_t widthStride,
214                                 const uint32_t heightStride,
215                                 CM_GPUCOPY_DIRECTION direction,
216                                 const uint32_t option,
217                                 CmEvent *&event);
218 
219     int32_t EnqueueUnalignedCopyInternal(CmSurface2DRT *surface,
220                                          unsigned char *sysMem,
221                                          const uint32_t widthStride,
222                                          const uint32_t heightStride,
223                                          CM_GPUCOPY_DIRECTION direction);
224 
225     int32_t FlushTaskWithoutSync(bool flushBlocked = false);
226 
227     int32_t GetTaskCount(uint32_t &numTasks);
228 
229     int32_t TouchFlushedTasks();
230 
231     int32_t GetTaskHasThreadArg(CmKernelRT *kernelArray[],
232                                 uint32_t numKernels,
233                                 bool &threadArgExists);
234     int32_t CleanQueue();
235 
236     CM_QUEUE_CREATE_OPTION &GetQueueOption();
237 
238     int32_t GetOSSyncEventHandle(void *& hOSSyncEvent);
239 
GetFastTrackerIndex()240     uint32_t GetFastTrackerIndex() { return m_fastTrackerIndex; }
241 
StreamIndex()242     uint32_t StreamIndex() const { return m_streamIndex; }
243 
244     int32_t EnqueueBufferCopy(  CmBuffer* buffer,
245                                 size_t   offset,
246                                 const unsigned char* sysMem,
247                                 uint64_t sysMemSize,
248                                 CM_GPUCOPY_DIRECTION dir,
249                                 CmEvent* wait_event,
250                                 CmEvent*& event,
251                                 uint32_t option);
252 
253 protected:
254     CmQueueRT(CmDeviceRT *device, CM_QUEUE_CREATE_OPTION queueCreateOption);
255 
256     ~CmQueueRT();
257 
258     int32_t Initialize();
259 
260     int32_t
261     Enqueue_RT(CmKernelRT *kernelArray[],
262                const uint32_t kernelCount,
263                const uint32_t totalThreadCount,
264                CmEventRT *&event,
265                const CmThreadSpaceRT *threadSpace = nullptr,
266                const uint64_t syncBitmap = 0,
267                PCM_POWER_OPTION powerOption = nullptr,
268                const uint64_t conditionalEndBitmap = 0,
269                PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr,
270                CM_TASK_CONFIG *taskConfig = nullptr);
271 
272     int32_t Enqueue_RT(CmKernelRT *kernelArray[],
273                        const uint32_t kernelCount,
274                        const uint32_t totalThreadCount,
275                        CmEventRT *&event,
276                        const CmThreadGroupSpace *threadGroupSpace = nullptr,
277                        const uint64_t syncBitmap = 0,
278                        PCM_POWER_OPTION powerOption = nullptr,
279                        const uint64_t conditionalEndBitmap = 0,
280                        PCM_HAL_CONDITIONAL_BB_END_INFO conditionalEndInfo = nullptr,
281                        CM_TASK_CONFIG *taskConfig = nullptr,
282                        const CM_EXECUTION_CONFIG* krnExecCfg = nullptr);
283 
284     int32_t Enqueue_RT(CmKernelRT *kernelArray[],
285                        CmEventRT *&event,
286                        uint32_t numTaskGenerated,
287                        bool isLastTask,
288                        uint32_t hints = 0,
289                        PCM_POWER_OPTION powerOption = nullptr);
290 
291     int32_t QueryFlushedTasks();
292 
293     //New sub functions for different task flush
294     int32_t FlushGeneralTask(CmTaskInternal *task);
295 
296     int32_t FlushGroupTask(CmTaskInternal *task);
297 
298     int32_t FlushVeboxTask(CmTaskInternal *task);
299 
300     int32_t FlushEnqueueWithHintsTask(CmTaskInternal *task);
301 
302     void PopTaskFromFlushedQueue();
303 
304     int32_t CreateEvent(CmTaskInternal *task,
305                         bool isVisible,
306                         int32_t &taskDriverId,
307                         CmEventRT *&event);
308 
309     int32_t AddGPUCopyKernel(CM_GPUCOPY_KERNEL* &kernelParam);
310 
311     int32_t GetGPUCopyKrnID(uint32_t widthInByte,
312                             uint32_t height,
313                             CM_SURFACE_FORMAT format,
314                             CM_GPUCOPY_DIRECTION copyDirection,
315                             CM_GPUCOPY_KERNEL_ID &kernelID);
316 
317     int32_t AllocateGPUCopyKernel(uint32_t widthInByte,
318                                   uint32_t height,
319                                   CM_SURFACE_FORMAT format,
320                                   CM_GPUCOPY_DIRECTION copyDirection,
321                                   CmKernel* &kernel);
322 
323     int32_t CreateGPUCopyKernel(uint32_t widthInByte,
324                                 uint32_t height,
325                                 CM_SURFACE_FORMAT format,
326                                 CM_GPUCOPY_DIRECTION copyDirection,
327                                 CM_GPUCOPY_KERNEL* &gpuCopyKernelParam);
328 
329     int32_t SearchGPUCopyKernel(uint32_t widthInByte,
330                                 uint32_t height,
331                                 CM_SURFACE_FORMAT format,
332                                 CM_GPUCOPY_DIRECTION copyDirection,
333                                 CM_GPUCOPY_KERNEL* &kernelParam);
334 
335     int32_t RegisterSyncEvent();
336 
337 
338     CmDeviceRT *m_device;
339     ThreadSafeQueue m_enqueuedTasks;
340     ThreadSafeQueue m_flushedTasks;
341 
342     CmDynamicArray m_eventArray;
343     CSync m_criticalSectionEvent;        // Protect m_eventArray
344     CSync m_criticalSectionHalExecute;   // Protect execution in HALCm, i.e HalCm_Execute
345     CSync m_criticalSectionFlushedTask;  // Protect QueryFlushedTask
346     CSync m_criticalSectionTaskInternal;
347 
348     uint32_t m_eventCount;
349     uint64_t m_CPUperformanceFrequency;
350 
351     CmDynamicArray m_copyKernelParamArray;
352     uint32_t m_copyKernelParamArrayCount;
353 
354     CSync m_criticalSectionGPUCopyKrn;
355 
356     CM_HAL_MAX_VALUES *m_halMaxValues;
357     CM_QUEUE_CREATE_OPTION m_queueOption;
358 
359     bool m_usingVirtualEngine;
360     MOS_VIRTUALENGINE_HINT_PARAMS m_mosVeHintParams;
361 
362     void  *m_osSyncEvent;   //KMD Notification
363 
364     uint32_t m_trackerIndex;
365     uint32_t m_fastTrackerIndex;
366 
367 private:
368     static const uint32_t INVALID_SYNC_BUFFER_HANDLE = 0xDEADBEEF;
369 
370     //--------------------------------------------------------------------------------
371     // Create a GPU context for this object.
372     //--------------------------------------------------------------------------------
373     MOS_STATUS CreateGpuContext(CM_HAL_STATE *halState,
374                                 MOS_GPU_CONTEXT gpuContextName,
375                                 MOS_GPU_NODE gpuNode,
376                                 MOS_GPUCTX_CREATOPTIONS *createOptions);
377 
378     //--------------------------------------------------------------------------------
379     // Destroy compute GPU context
380     //--------------------------------------------------------------------------------
381     MOS_STATUS DestroyComputeGpuContext();
382 
383     //--------------------------------------------------------------------------------
384     // Calls CM HAL API to submit a group task to command buffer.
385     //--------------------------------------------------------------------------------
386     MOS_STATUS ExecuteGroupTask(CM_HAL_STATE *halState,
387                                 CM_HAL_EXEC_TASK_GROUP_PARAM *taskParam,
388                                 MOS_GPU_CONTEXT gpuContextName);
389 
390     //--------------------------------------------------------------------------------
391     // Calls CM HAL API to submit a general task to command buffer.
392     //--------------------------------------------------------------------------------
393     MOS_STATUS ExecuteGeneralTask(CM_HAL_STATE *halState,
394                                   CM_HAL_EXEC_TASK_PARAM *taskParam,
395                                   MOS_GPU_CONTEXT gpuContextName);
396 
397     //--------------------------------------------------------------------------------
398     // Creates a buffer to synchronize all tasks in this queue.
399     // It's useful only on certain operating systems.
400     //--------------------------------------------------------------------------------
401     MOS_STATUS CreateSyncBuffer(CM_HAL_STATE *halState);
402 
403     //--------------------------------------------------------------------------------
404     // Selects sync buffer in this queue so CM HAL can add it to the command buffer.
405     // It's useful only on certain operating systems.
406     //--------------------------------------------------------------------------------
407     MOS_STATUS SelectSyncBuffer(CM_HAL_STATE *halState);
408 
409     //--------------------------------------------------------------------------------
410     // Releases sync buffer in this queue if it's created.
411     //--------------------------------------------------------------------------------
412     MOS_STATUS ReleaseSyncBuffer(CM_HAL_STATE *halState);
413 
414 #if CM_LOG_ON
415     CM_HAL_STATE* GetHalState();
416 #endif  // #if CM_LOG_ON
417 
418     uint32_t m_streamIndex;
419 
420     GPU_CONTEXT_HANDLE m_gpuContextHandle;
421 
422     // Handle of buffer resource for synchronizing tasks in this queue.
423     uint32_t m_syncBufferHandle;
424 
425 
426     CmQueueRT(const CmQueueRT& other);
427     CmQueueRT& operator=(const CmQueueRT& other);
428 };
429 };  //namespace
430 
431 #endif  // #ifnfef MEDIADRIVER_AGNOSTIC_COMMON_CM_CMQUEUERT_H_
432