1 /* 2 * Copyright (c) 2017, Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 #ifndef CMRTLIB_AGNOSTIC_SHARE_CM_QUEUE_BASE_H_ 23 #define CMRTLIB_AGNOSTIC_SHARE_CM_QUEUE_BASE_H_ 24 25 #include "cm_include.h" 26 #include <cstdint> 27 #include <cstddef> 28 29 class CmTask; 30 class CmEvent; 31 class CmThreadSpace; 32 class CmThreadGroupSpace; 33 class CmBuffer; 34 class CmSurface2D; 35 class CmKernel; 36 class CmVebox; 37 38 enum CM_QUEUE_TYPE 39 { 40 CM_QUEUE_TYPE_NONE = 0, 41 CM_QUEUE_TYPE_RENDER = 1, 42 CM_QUEUE_TYPE_COMPUTE = 2 43 }; 44 45 enum CM_QUEUE_SSEU_USAGE_HINT_TYPE 46 { 47 CM_QUEUE_SSEU_USAGE_HINT_DEFAULT = 0, 48 CM_QUEUE_SSEU_USAGE_HINT_VME = 1 49 }; 50 51 struct CM_QUEUE_CREATE_OPTION 52 { 53 CM_QUEUE_TYPE QueueType : 3; 54 bool RAMode : 1; 55 unsigned int Reserved0 : 3; 56 bool UserGPUContext : 1; // Is the user-provided GPU Context already created externally 57 unsigned int GPUContext : 8; // user-provided GPU Context ordinal 58 CM_QUEUE_SSEU_USAGE_HINT_TYPE SseuUsageHint : 3; 59 unsigned int Reserved1 : 1; 60 unsigned int Reserved2 : 12; 61 }; 62 63 const CM_QUEUE_CREATE_OPTION CM_DEFAULT_QUEUE_CREATE_OPTION = { CM_QUEUE_TYPE_RENDER, false, 0, false, 0, CM_QUEUE_SSEU_USAGE_HINT_DEFAULT, 0, 0 }; 64 65 //! 66 //! \brief CM task queue management. 67 //! 68 class CmQueue 69 { 70 public: 71 //! 72 //! \brief Enqueue a task for execution with per-task thread space. 73 //! \details This function enqueues a task represented by the CmTask object. 74 //! The kernels in the CmTask object may be run concurrently. 75 //! Tasks get executed according to the order they get enqueued. 76 //! This is a non-blocking call. It returns immediately without waiting 77 //! for GPU to start or finish execution. A CmEvent is generated each time 78 //! a task is enqueued. The CmEvent can be used to check the status of task. 79 //! The generated event needs to be managed and released by user. 80 //! Since event is not useful in some cases, runtime provides the capability 81 //! to avoid generating event. 82 //! If thread space is valid, the dependency defined by thread space will be honored. 83 //! \param [in] task 84 //! pointer to task to submit 85 //! \param [in,out] event 86 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 87 //! its value returned by runtime is NULL. 88 //! \param [in] threadSpace 89 //! pointer to thread space which can define the thread dependency within the task. 90 //! This is a per task thread space. If this task has multiple kernels, each kernel 91 //! will have the thread space of same dimension, same dependency etc. If it is nullptr, 92 //! there is no thread dependency and the maximum thread space width will be asssumed 93 //! to calculate the coordinates for each thread. For each kernel , the per kernel thread space 94 //! that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space. 95 //! \retval CM_SUCCESS if the task is successfully enqueued. 96 //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory 97 //! \retval CM_FAILURE otherwise 98 //! 99 CM_RT_API virtual int32_t Enqueue(CmTask *task, 100 CmEvent *&event, 101 const CmThreadSpace *threadSpace = nullptr) = 0; 102 //! 103 //! \brief Destroy the CmEvent generated by Enqueue. 104 //! \details Destroy the event object previously generated by Enqueue. 105 //! The CmEvent object can be destroyed even before the corresponding task flushed or finished. 106 //! If this happens, there is no way the app can get the task status. 107 //! \param [in] event 108 //! reference to pointer to event 109 //! \retval CM_SUCCESS if event destroyed successfully 110 //! \retval CM_FAILURE otherwise 111 //! 112 CM_RT_API virtual int32_t DestroyEvent(CmEvent *&event) = 0; 113 114 //! 115 //! \brief Enqueue the task with thread group space. 116 //! \details 117 //! \param [in]task 118 //! pointer to task to submit 119 //! \param [in,out] event 120 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 121 //! its value returned by runtime is NULL. 122 //! \param [in] threadGroupSpace 123 //! pointer to thread group space which defines the dimensions of the task. 124 //! pThreadGroupSpace can not be NULL. 125 //! \retval CM_SUCCESS if the task is successfully enqueued. 126 //! \retval CM_INVALID_ARG_VALUE if input task is not valid 127 //! \retval CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation. 128 //! \retval CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid. 129 //! \retval CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments 130 //! 131 CM_RT_API virtual int32_t 132 EnqueueWithGroup(CmTask *task, 133 CmEvent *&event, 134 const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0; 135 136 //! 137 //! \brief Enqueues the kernel to copy from system(CPU) memory to video(GPU) memory. 138 //! \details This function enqueues a task, which contains a pre-defined kernel to copy from host 139 //! system memory to video surface. 140 //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. 141 //! The CmEvent can be used to check the status. 142 //! The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well. 143 //! \param [in] surface 144 //! surface as copy destination, surface's width in bytes must be 16-Byte aligned 145 //! \param [in] sysMem 146 //! host memory as copy source, must be 16-Byte aligned 147 //! \param [in,out] event 148 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 149 //! its value returned by runtime is NULL. 150 //! \retval CM_SUCCESS if the task is successfully enqueued 151 //! \retval CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned 152 //! or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE. 153 //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. 154 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 155 //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT 156 //! \retval CM_FAILURE otherwise 157 //! 158 CM_RT_API virtual int32_t EnqueueCopyCPUToGPU(CmSurface2D *surface, 159 const unsigned char *sysMem, 160 CmEvent *&event) = 0; 161 162 //! 163 //! \brief Enqueues the kernel to copy from video(GPU) memory to system(CPU) memory. 164 //! \details This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory. 165 //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. 166 //! The CmEvent can be used to check the status or other data regarding the task execution. 167 //! The host memory sysMem must be 16-Byte aligned and surface's width in bytes must be 16-Byte aligned as well. 168 //! \param [in] surface 169 //! surface as copy source, surface's width in bytes must be 16-Byte aligned 170 //! \param [in] sysMem 171 //! host memory as copy destination, must be 16-Byte aligned 172 //! \param [in,out] event 173 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 174 //! its value returned by runtime is NULL. 175 //! \retval CM_SUCCESS if the task is successfully enqueued 176 //! \retval CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned 177 //! or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE. 178 //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. 179 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 180 //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT 181 //! \retval CM_FAILURE otherwise 182 //! 183 CM_RT_API virtual int32_t EnqueueCopyGPUToCPU(CmSurface2D *surface, 184 unsigned char *sysMem, 185 CmEvent *&event) = 0; 186 187 //! 188 //! \brief Enqueues the kernel to initialize a 2D surface. 189 //! \details This function enqueues a task, which contains a pre-defined kernel to initialize a surface 2d 190 //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. 191 //! The CmEvent can be used to check the status or other data regarding the task execution. 192 //! \param [in] surface 193 //! surface to initialize 194 //! \param [in] initValue 195 //! value to fill the surface 196 //! \param [in,out] event 197 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 198 //! its value returned by runtime is NULL. 199 //! \retval CM_SUCCESS if the task is successfully enqueued 200 //! \retval CM_FAILURE otherwise 201 //! 202 CM_RT_API virtual int32_t EnqueueInitSurface2D(CmSurface2D *surface, 203 const uint32_t initValue, 204 CmEvent *&event) = 0; 205 206 //! 207 //! \brief Enqueue the kernel to copy memory between surfaces. 208 //! \details This function enqueues a task, which contains a pre-defined kernel to copy memory between surfaces. 209 //! This is a non-blocking call. A CmEvent is generated each time a task is enqueued. 210 //! The CmEvent can be used to check the status or other data regarding the task execution. 211 //! The input and output surfaces should have the same width, height and format. 212 //! \param [in] inputSurface 213 //! surface as copy source 214 //! \param [in] outputSurface 215 //! surface as copy destination 216 //! \param [in] option 217 //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n 218 //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n 219 //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. 220 //! \param [in,out] event 221 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 222 //! its value returned by runtime is NULL. 223 //! \retval CM_SUCCESS if the task is successfully enqueued 224 //! \retval CM_GPUCOPY_INVALID_SURFACES if the input and output surfaces have different 225 //! width, height and format. 226 //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT 227 //! \retval CM_FAILURE otherwise 228 //! 229 CM_RT_API virtual int32_t EnqueueCopyGPUToGPU(CmSurface2D *outputSurface, 230 CmSurface2D *inputSurface, 231 uint32_t option, 232 CmEvent *&event) = 0; 233 234 //! 235 //! \brief Enqueues the kernel to copy memory between host memories. 236 //! \details This function enqueues a task, which contains a pre-defined kernel to copy memory from src to dest memory. 237 //! Both pDstSysMem and pSrcSysMem need to be 16-Byte aligned. The maximum size is determined by sytem's memory 238 //! and it should be less than CM_MAX_1D_SURF_WIDTH bytes which is 1G bytes now. If the copy size is less than 239 //! 1K bytes, the event will not be generated and it is a blocking call. 240 //! For the size larger than 1K bytes, this is a non-blocking call. 241 //! A CmEvent is generated to check the status or other data regarding the task execution. 242 //! To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function 243 //! \param [in] dstSysMem 244 //! destination memory, must be 16-Byte aligned 245 //! \param [in] srcSysMem 246 //! source memory, must be 16-Byte aligned 247 //! \param [in] size 248 //! size of memory to copy in bytes 249 //! \param [in] option 250 //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n 251 //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n 252 //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. 253 //! \param [in,out] event 254 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 255 //! its value returned by runtime is NULL. 256 //! \retval CM_SUCCESS if the task is successfully enqueued 257 //! \retval CM_GPUCOPY_INVALID_SYSMEM if pDstSysMem or pSrcSysMem is not 16-Byte aligned. 258 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 259 //! \retval CM_FAILURE otherwise 260 //! 261 CM_RT_API virtual int32_t EnqueueCopyCPUToCPU(unsigned char *dstSysMem, 262 unsigned char *srcSysMem, 263 uint32_t size, 264 uint32_t option, 265 CmEvent *&event) = 0; 266 267 //! 268 //! \brief Enqueue the kernel to copy memory from system memory to video memory with width and height stride. 269 //! \details This function enqueues a task, which contains a pre-defined kernel to copy from system memory to a surface. 270 //! Depending on user "opiton", this is a non-blocking or blocking call. 271 //! A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data 272 //! regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to 273 //! this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any 274 //! alignment restriction. 275 //! \param [in] surface 276 //! surface as copy destination 277 //! \param [in] sysMem 278 //! system memory as copy source must be 16-Byte aligned 279 //! \param [in] widthStride 280 //! width stride of memory stored in host memory, in bytes, must be 16-Byte aligned 281 //! \param [in] heightStride 282 //! height stride of memory stored in host memory, in bytes. 283 //! \param [in] option 284 //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n 285 //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n 286 //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. 287 //! \param [in,out] event 288 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 289 //! its value returned by runtime is NULL. 290 //! \retval CM_SUCCESS if the task is successfully enqueued 291 //! \retval CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned 292 //! or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE. 293 //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. 294 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 295 //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT 296 //! \retval CM_FAILURE otherwise 297 //! 298 CM_RT_API virtual int32_t EnqueueCopyCPUToGPUFullStride(CmSurface2D *surface, 299 const unsigned char *sysMem, 300 const uint32_t widthStride, 301 const uint32_t heightStride, 302 const uint32_t option, 303 CmEvent *& event) = 0; 304 305 //! 306 //! \brief Enqueue the kernel to copy memory from video memory to system memory with width and height stride. 307 //! \details This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory. 308 //! Depending on user "opiton", this is a non-blocking or blocking call. 309 //! A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data 310 //! regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to 311 //! this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any 312 //! alignment restriction. 313 //! \param [in] surface 314 //! surface as copy source 315 //! \param [in] sysMem 316 //! system memory as copy destination, must be 16-Byte aligned 317 //! \param [in] widthStride 318 //! width stride of memory stored in host memory, in bytes, must be 16-Byte aligned 319 //! \param [in] heightStride 320 //! height stride of memory stored in host memory, in bytes, 321 //! \param [in] option 322 //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n 323 //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n 324 //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. 325 //! \param [in,out] event 326 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 327 //! its value returned by runtime is NULL. 328 //! \retval CM_SUCCESS if the task is successfully enqueued 329 //! \retval CM_GPUCOPY_INVALID_STRIDE if stride is not 16-Byte aligned or less than surfaces width in bytes. 330 //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. 331 //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT 332 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 333 //! \retval CM_FAILURE otherwise 334 //! 335 CM_RT_API virtual int32_t EnqueueCopyGPUToCPUFullStride(CmSurface2D *surface, 336 unsigned char *sysMem, 337 const uint32_t widthStride, 338 const uint32_t heightStride, 339 const uint32_t option, 340 CmEvent *& event) = 0; 341 342 //! 343 //! \brief Enqueue the kernel to copy memory from system memory to video memory with width and height stride. 344 //! \details This function enqueues a task, which contains a pre-defined kernel to copy from system memory to a surface. 345 //! Depending on user "opiton", this is a non-blocking or blocking call. 346 //! A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data 347 //! regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to 348 //! this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any 349 //! alignment restriction. 350 //! \param [in] surface 351 //! surface as copy destination 352 //! \param [in] sysMem 353 //! system memory as copy source must be 16-Byte aligned 354 //! \param [in] widthStride 355 //! width stride of memory stored in host memory, in bytes, must be 16-Byte aligned 356 //! \param [in] heightStride 357 //! height stride of memory stored in host memory, in bytes. 358 //! \param [in] option 359 //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n 360 //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n 361 //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. 362 //! \param [in,out] event 363 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 364 //! its value returned by runtime is NULL. 365 //! \retval CM_SUCCESS if the task is successfully enqueued 366 //! \retval CM_GPUCOPY_INVALID_WIDTH if surface's width in bytes is not 16-Byte aligned 367 //! or more than CM_MAX_GPUCOPY_SURFACE_WIDTH_IN_BYTE. 368 //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. 369 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 370 //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT 371 //! \retval CM_FAILURE otherwise 372 //! 373 CM_RT_API virtual int32_t EnqueueCopyCPUToGPUFullStrideDup(CmSurface2D *surface, 374 const unsigned char *sysMem, 375 const uint32_t widthStride, 376 const uint32_t heightStride, 377 const uint32_t option, 378 CmEvent *& event) = 0; 379 380 //! 381 //! \brief Enqueue the kernel to copy memory from video memory to system memory with width and height stride. 382 //! \details This function enqueues a task, which contains a pre-defined kernel to copy from surface to system memory. 383 //! Depending on user "opiton", this is a non-blocking or blocking call. 384 //! A CmEvent is generated each time a task is enqueued. The CmEvent can be used to check the status or other data 385 //! regarding the task execution. To avoid generating event, user can set the event as CM_NO_EVENT and pass it to 386 //! this function. The host memory sysMem's width stride must be 16-Byte aligned, and height stride has no any 387 //! alignment restriction. 388 //! \param [in] surface 389 //! surface as copy source 390 //! \param [in] sysMem 391 //! system memory as copy destination, must be 16-Byte aligned 392 //! \param [in] widthStride 393 //! width stride of memory stored in host memory, in bytes, must be 16-Byte aligned 394 //! \param [in] heightStride 395 //! height stride of memory stored in host memory, in bytes, 396 //! \param [in] option 397 //! If it is "CM_FASTCOPY_OPTION_NONBLOCKING", it returns immediately without waiting for GPU to start or finish.\n 398 //! If it is "CM_FASTCOPY_OPTION_BLOCKING", this function will return until copy is finished indeed.\n 399 //! If it is "CM_FASTCOPY_OPTION_DISABLE_TURBO_BOOST", mdf turbo boost is disabled. 400 //! \param [in,out] event 401 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 402 //! its value returned by runtime is NULL. 403 //! \retval CM_SUCCESS if the task is successfully enqueued 404 //! \retval CM_GPUCOPY_INVALID_STRIDE if stride is not 16-Byte aligned or less than surfaces width in bytes. 405 //! \retval CM_GPUCOPY_INVALID_SYSMEM if sysMem is not 16-Byte aligned. 406 //! \retval CM_GPUCOPY_INVALID_SIZE if surface's height is more than CM_MAX_GPUCOPY_SURFACE_HEIGHT 407 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 408 //! \retval CM_FAILURE otherwise 409 //! 410 CM_RT_API virtual int32_t EnqueueCopyGPUToCPUFullStrideDup(CmSurface2D *surface, 411 unsigned char *sysMem, 412 const uint32_t widthStride, 413 const uint32_t heightStride, 414 const uint32_t option, 415 CmEvent *& event) = 0; 416 417 //! 418 //! \brief Enqueue a task for execution with hints. 419 //! \details This API is designed to saturate the EUs when running a large dependency kernel. 420 //! At least two kernels must exist in the task. The ideal case is at least one large dependency kernel 421 //! running with smaller kernels. The idea is to get the smaller kernels for free during the time it already 422 //! takes to execute the large dependency kernel. Each task can have up to CAP_KERNEL_COUNT_PER_TASK kernels. 423 //! The 0th bit of the hints indicates to use media object or media walker. Currently, only media object is valid. 424 //! The next bits indicate whether the next kernel is in the same or different kernel group. 425 //! For example, if the 1th bit is set then the second kernel is in a different kernel group from the first kernel, 426 //! if it is not set it is in the same kernel group. The kernels are interleaved between different kernel groups 427 //! and run concurrently. Within a kernel group, the kernels are dispatched in order. The kernel groups are dispatched 428 //! to separate sub-slices. The assumption is made that the kernel groups are comparable in kernel execution time. 429 //! There can be no dependency between different kernels; all kernels in the task should be independent of one another. 430 //! Additionally, pKernel->AssociateThreadSpace(CmThreadSpace*& pTS) must be called for each kernel. 431 //! A CmEvent is generated to check the status or other data regarding the task execution. 432 //! To avoid generating event, user can set the event as CM_NO_EVENT and pass it to this function. 433 //! \param [in] task 434 //! pointer to task to submit 435 //! \param [in,out] event 436 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 437 //! its value returned by runtime is NULL. 438 //! \param [in] hints 439 //! Hints about work load from host to driver. 440 //! \retval CM_SUCCESS if the task is successfully enqueued. 441 //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory 442 //! \retval CM_FAILURE otherwise 443 //! 444 CM_RT_API virtual int32_t EnqueueWithHints(CmTask *task, 445 CmEvent *&event, 446 uint32_t hints = 0) = 0; 447 448 //! 449 //! \brief Enqueue a vebox task to vebox engine. 450 //! \details This call submits a VEBOX task to VEBOX engine for execution. 451 //! Before this function is called, user need call CmDevice::CreateVebox() to create a CmVebox object, 452 //! and call the APIs in CmVebox class to set up VEBOX state and surfaces. 453 //! \param [in] vebox 454 //! Pointer to a CmVebox object. 455 //! \param [in,out] event 456 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 457 //! its value returned by runtime is NULL. 458 //! \retval CM_SUCCESS if the task is successfully enqueued. 459 //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory 460 //! \retval CM_INVALID_ARG_VALUE if input pVebox is not valid 461 //! \retval CM_FAILURE otherwise 462 //! 463 CM_RT_API virtual int32_t EnqueueVebox(CmVebox *vebox, CmEvent *&event) = 0; 464 465 //! 466 //! \brief Enqueue a task for execution with per-task thread space in a fast path. 467 //! \details This function enqueues a task represented by the CmTask object. 468 //! The kernels in the CmTask object may be run concurrently. 469 //! Tasks get executed according to the order they get enqueued. 470 //! This is a non-blocking call. It returns immediately without waiting 471 //! for GPU to start or finish execution. A CmEvent is generated each time 472 //! a task is enqueued. The CmEvent can be used to check the status of task. 473 //! The generated event needs to be managed and released by user. 474 //! Since event is not useful in some cases, runtime provides the capability 475 //! to avoid generating event. 476 //! If thread space is valid, the dependency defined by thread space will be honored. 477 //! \param [in] task 478 //! pointer to task to submit 479 //! \param [in,out] event 480 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 481 //! its value returned by runtime is NULL. 482 //! \param [in] threadSpace 483 //! pointer to thread space which can define the thread dependency within the task. 484 //! This is a per task thread space. If this task has multiple kernels, each kernel 485 //! will have the thread space of same dimension, same dependency etc. If it is nullptr, 486 //! there is no thread dependency and the maximum thread space width will be asssumed 487 //! to calculate the coordinates for each thread. For each kernel , the per kernel thread space 488 //! that is defined by calling CmKernel::AssociateThreadSpace() overwrites the per task thread space. 489 //! \retval CM_SUCCESS if the task is successfully enqueued. 490 //! \retval CM_OUT_OF_HOST_MEMORY if out of host memory 491 //! \retval CM_FAILURE otherwise 492 //! 493 CM_RT_API virtual int32_t EnqueueFast(CmTask *task, 494 CmEvent *&event, 495 const CmThreadSpace *threadSpace = nullptr) = 0; 496 497 //! 498 //! \brief Destroy the CmEvent generated by EnqueueFast. 499 //! \details Destroy the event object previously generated by EnqueueFast. 500 //! The CmEvent object can be destroyed even before the corresponding task flushed or finished. 501 //! If this happens, there is no way the app can get the task status. 502 //! \param [in] event 503 //! reference to pointer to event 504 //! \retval CM_SUCCESS if event destroyed successfully 505 //! \retval CM_FAILURE otherwise 506 //! 507 CM_RT_API virtual int32_t DestroyEventFast(CmEvent *&event) = 0; 508 509 //! 510 //! \brief Enqueue the task with thread group space in a fast path. 511 //! \details 512 //! \param [in]task 513 //! pointer to task to submit 514 //! \param [in,out] event 515 //! reference to pointer of event generated. If it is set as CM_NO_EVENT, 516 //! its value returned by runtime is NULL. 517 //! \param [in] threadGroupSpace 518 //! pointer to thread group space which defines the dimensions of the task. 519 //! pThreadGroupSpace can not be NULL. 520 //! \retval CM_SUCCESS if the task is successfully enqueued. 521 //! \retval CM_INVALID_ARG_VALUE if input task is not valid 522 //! \retval CM_EXCEED_MAX_KERNEL_PER_ENQUEUE if the task's kernel number exceeds limitation. 523 //! \retval CM_INVALID_THREAD_GROUP_SPACE if the thread group space specification is invalid. 524 //! \retval CM_THREAD_ARG_NOT_ALLOWED if user has per thread arguments 525 //! 526 CM_RT_API virtual int32_t EnqueueWithGroupFast(CmTask *task, 527 CmEvent *&event, 528 const CmThreadGroupSpace *threadGroupSpace = nullptr) = 0; 529 530 //! 531 //! \brief Enqueue the kernel to copy memory from video memory buffer/1D surface to system memory. 532 //! \details This function enqueues a task that contains a pre-defined kernel to copy from 533 //! video memory buffer/1D surface to system memory. This is a non-blocking call. 534 //! Buffer read copy task need to wait a CM wait_event to check dependent condition ready 535 //! status before actual copy starts. 536 //! Also a Cm notification event is generated each time a task is enqueued. 537 //! The CmEvent can be used to check the status or other data regarding the task execution. 538 //! \param [in] buffer 539 //! CM Buffer as 1D surface is copy source 540 //! \param [in] offset 541 //! data copy starting address offset within CM buffer 542 //! \param [in] sysMem 543 //! system memory as copy destination, better to be 16-Byte aligned 544 //! \param [in] sysMemSize 545 //! data byte count to copy into system memory 546 //! \param [in] wait_event 547 //! a wait conditional event before read copy starts 548 //! \param [in,out] event 549 //! reference to pointer of CM event generated to notify buffer read copy task status change 550 //! \param [in] option 551 //! If it is none-zero, CPU worker thread will be used for buffer read copy 552 //! \retval CM_SUCCESS if the task is successfully enqueued 553 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 554 //! \retval CM_FAILURE otherwise 555 //! 556 CM_RT_API virtual int32_t EnqueueReadBuffer(CmBuffer* buffer, 557 size_t offset, 558 const unsigned char* sysMem, 559 uint64_t sysMemSize, 560 CmEvent* wait_event, 561 CmEvent*& event, 562 unsigned option = 0) = 0; 563 564 //! 565 //! \brief Enqueue the kernel to copy memory from to system memory to video memory buffer/1D surface 566 //! \details This function enqueues a task, which contains a pre-defined kernel to copy from system 567 //! memory to 1D surface.This is a non-blocking call. 568 //! Buffer write copy task need to wait an CM wait_event to check condition ready status 569 //! before actual copy starts. 570 //! Also a Cm notification event is generated each time a task is enqueued. 571 //! The CmEvent can be used to check the status or other data regarding the task execution. 572 //! \param [in] buffer 573 //! CM Buffer as 1D surface is copy destination 574 //! \param [in] offset 575 //! data copy starting address offset within CM buffer 576 //! \param [in] sysMem 577 //! system memory as copy source, better to be 16-Byte aligned 578 //! \param [in] sysMemSize 579 //! data byte count to copy from system memory 580 //! \param [in] wait_event 581 //! a wait conditional event before write copy starts 582 //! \param [in,out] event 583 //! reference to pointer of CM event generated to notify buffer write copy task status change 584 //! \param [in] option 585 //! If it is none-zero, CPU worker thread will be used for buffer copy 586 //! \retval CM_SUCCESS if the task is successfully enqueued 587 //! \retval CM_GPUCOPY_OUT_OF_RESOURCE if runtime runs out of resources 588 //! \retval CM_FAILURE otherwise 589 //! 590 CM_RT_API virtual int32_t EnqueueWriteBuffer(CmBuffer* buffer, 591 size_t offset, 592 const unsigned char* sysMem, 593 uint64_t sysMemSize, 594 CmEvent* wait_event, 595 CmEvent*& event, 596 unsigned option = 0) = 0; 597 598 599 //! 600 //! \brief [Only In Emu Mode] set the resident group number and parallel thread number 601 //! \details 602 //! \param [in] residentGroupNum 603 //! number of resident groups running on device 604 //! \param [in] parallelThreadNum 605 //! number of threads run in parallel 606 //! \retval CM_SUCCESS if the parameter is successfully set. 607 //! \retval CM_NOT_IMPLEMENTED if in sim or emu mode 608 //! 609 CM_RT_API virtual int32_t SetResidentGroupAndParallelThreadNum(uint32_t residentGroupNum, uint32_t parallelThreadNum) = 0; 610 611 protected: 612 virtual ~CmQueue() = default; 613 }; 614 615 #endif // #ifndef CMRTLIB_AGNOSTIC_SHARE_CM_QUEUE_BASE_H_ 616