1 /* 2 * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. 3 * 4 * Please refer to the NVIDIA end user license agreement (EULA) associated 5 * with this source code for terms and conditions that govern your use of 6 * this software. Any use, reproduction, disclosure, or distribution of 7 * this software and related documentation outside the terms of the EULA 8 * is strictly prohibited. 9 * 10 */ 11 12 #ifndef __cuda_tools_h__ 13 #define __cuda_tools_h__ 14 15 #include <stdlib.h> 16 17 #if defined(WIN32) || defined(GPAC_CONFIG_LINUX) || defined(GPAC_CONFIG_DARWIN) 18 19 #ifdef __cplusplus 20 extern "C" { 21 #endif 22 23 //needed for dec_nvdec_sdk.h which uses GL prototypes 24 #ifndef GPAC_DISABLE_3D 25 #include "../compositor/gl_inc.h" 26 #else 27 typedef u32 GLuint; 28 typedef u32 GLenum; 29 #endif 30 31 #ifndef __CUDA_API_VERSION 32 #define __CUDA_API_VERSION 4000 33 #endif 34 35 /** 36 * \defgroup CUDA_DRIVER CUDA Driver API 37 * 38 * This section describes the low-level CUDA driver application programming 39 * interface. 40 * 41 * @{ 42 */ 43 44 /** 45 * \defgroup CUDA_TYPES Data types used by CUDA driver 46 * @{ 47 */ 48 49 /** 50 * CUDA API version number 51 */ 52 #define CUDA_VERSION 4000 /* 4.0 */ 53 54 /** 55 * CUDA device pointer 56 */ 57 #if __CUDA_API_VERSION >= 3020 58 59 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__aarch64__) 60 typedef unsigned long long CUdeviceptr; 61 #else 62 typedef unsigned int CUdeviceptr; 63 #endif 64 65 #endif /* __CUDA_API_VERSION >= 3020 */ 66 67 typedef int CUdevice; /**< CUDA device */ 68 typedef struct CUctx_st *CUcontext; /**< CUDA context */ 69 typedef struct CUmod_st *CUmodule; /**< CUDA module */ 70 typedef struct CUfunc_st *CUfunction; /**< CUDA function */ 71 typedef struct CUarray_st *CUarray; /**< CUDA array */ 72 typedef struct CUtexref_st *CUtexref; /**< CUDA texture reference */ 73 typedef struct CUsurfref_st *CUsurfref; /**< CUDA surface reference */ 74 typedef struct CUevent_st *CUevent; /**< CUDA event */ 75 typedef struct CUstream_st *CUstream; /**< CUDA stream */ 76 typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphics interop resource */ 77 78 typedef struct CUuuid_st /**< CUDA definition of UUID */ 79 { 80 char bytes[16]; 81 } CUuuid; 82 83 /** 84 * Context creation flags 85 */ 86 typedef enum CUctx_flags_enum 87 { 88 CU_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ 89 CU_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ 90 CU_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ 91 CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ 92 CU_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling \deprecated */ 93 CU_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ 94 CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ 95 #if __CUDA_API_VERSION < 4000 96 CU_CTX_SCHED_MASK = 0x03, 97 CU_CTX_FLAGS_MASK = 0x1f 98 #else 99 CU_CTX_SCHED_MASK = 0x07, 100 CU_CTX_PRIMARY = 0x20, /**< Initialize and return the primary context */ 101 CU_CTX_FLAGS_MASK = 0x3f 102 #endif 103 } CUctx_flags; 104 105 /** 106 * Event creation flags 107 */ 108 typedef enum CUevent_flags_enum 109 { 110 CU_EVENT_DEFAULT = 0, /**< Default event flag */ 111 CU_EVENT_BLOCKING_SYNC = 1, /**< Event uses blocking synchronization */ 112 CU_EVENT_DISABLE_TIMING = 2 /**< Event will not record timing data */ 113 } CUevent_flags; 114 115 /** 116 * Array formats 117 */ 118 typedef enum CUarray_format_enum 119 { 120 CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */ 121 CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */ 122 CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */ 123 CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */ 124 CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */ 125 CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */ 126 CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */ 127 CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */ 128 } CUarray_format; 129 130 /** 131 * Texture reference addressing modes 132 */ 133 typedef enum CUaddress_mode_enum 134 { 135 CU_TR_ADDRESS_MODE_WRAP = 0, /**< Wrapping address mode */ 136 CU_TR_ADDRESS_MODE_CLAMP = 1, /**< Clamp to edge address mode */ 137 CU_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */ 138 CU_TR_ADDRESS_MODE_BORDER = 3 /**< Border address mode */ 139 } CUaddress_mode; 140 141 /** 142 * Texture reference filtering modes 143 */ 144 typedef enum CUfilter_mode_enum 145 { 146 CU_TR_FILTER_MODE_POINT = 0, /**< Point filter mode */ 147 CU_TR_FILTER_MODE_LINEAR = 1 /**< Linear filter mode */ 148 } CUfilter_mode; 149 150 /** 151 * Device properties 152 */ 153 typedef enum CUdevice_attribute_enum 154 { 155 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */ 156 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */ 157 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */ 158 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */ 159 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */ 160 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */ 161 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */ 162 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */ 163 CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ 164 CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */ 165 CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */ 166 CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */ 167 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */ 168 CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ 169 CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Peak clock frequency in kilohertz */ 170 CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */ 171 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently */ 172 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */ 173 CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */ 174 CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */ 175 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */ 176 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See ::CUcomputemode for details) */ 177 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */ 178 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */ 179 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */ 180 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */ 181 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */ 182 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */ 183 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Maximum texture array width */ 184 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Maximum texture array height */ 185 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximum slices in a texture array */ 186 CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */ 187 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */ 188 CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */ 189 CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */ 190 CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */ 191 CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 /**< Device is using TCC driver model */ 192 #if __CUDA_API_VERSION >= 4000 193 , CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */ 194 CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */ 195 CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */ 196 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */ 197 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */ 198 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device uses shares a unified address space with the host */ 199 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */ 200 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43 /**< Maximum layers in a 1D layered texture */ 201 #endif 202 } CUdevice_attribute; 203 204 /** 205 * Legacy device properties 206 */ 207 typedef struct CUdevprop_st 208 { 209 int maxThreadsPerBlock; /**< Maximum number of threads per block */ 210 int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */ 211 int maxGridSize[3]; /**< Maximum size of each dimension of a grid */ 212 int sharedMemPerBlock; /**< Shared memory available per block in bytes */ 213 int totalConstantMemory; /**< Constant memory available on device in bytes */ 214 int SIMDWidth; /**< Warp size in threads */ 215 int memPitch; /**< Maximum pitch in bytes allowed by memory copies */ 216 int regsPerBlock; /**< 32-bit registers available per block */ 217 int clockRate; /**< Clock frequency in kilohertz */ 218 int textureAlign; /**< Alignment requirement for textures */ 219 } CUdevprop; 220 221 /** 222 * Function properties 223 */ 224 typedef enum CUfunction_attribute_enum 225 { 226 /** 227 * The maximum number of threads per block, beyond which a launch of the 228 * function would fail. This number depends on both the function and the 229 * device on which the function is currently loaded. 230 */ 231 CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, 232 233 /** 234 * The size in bytes of statically-allocated shared memory required by 235 * this function. This does not include dynamically-allocated shared 236 * memory requested by the user at runtime. 237 */ 238 CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, 239 240 /** 241 * The size in bytes of user-allocated constant memory required by this 242 * function. 243 */ 244 CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, 245 246 /** 247 * The size in bytes of local memory used by each thread of this function. 248 */ 249 CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, 250 251 /** 252 * The number of registers used by each thread of this function. 253 */ 254 CU_FUNC_ATTRIBUTE_NUM_REGS = 4, 255 256 /** 257 * The PTX virtual architecture version for which the function was 258 * compiled. This value is the major PTX version * 10 + the minor PTX 259 * version, so a PTX version 1.3 function would return the value 13. 260 * Note that this may return the undefined value of 0 for cubins 261 * compiled prior to CUDA 3.0. 262 */ 263 CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, 264 265 /** 266 * The binary architecture version for which the function was compiled. 267 * This value is the major binary version * 10 + the minor binary version, 268 * so a binary version 1.3 function would return the value 13. Note that 269 * this will return a value of 10 for legacy cubins that do not have a 270 * properly-encoded binary architecture version. 271 */ 272 CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, 273 274 CU_FUNC_ATTRIBUTE_MAX 275 } CUfunction_attribute; 276 277 /** 278 * Function cache configurations 279 */ 280 typedef enum CUfunc_cache_enum 281 { 282 CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ 283 CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ 284 CU_FUNC_CACHE_PREFER_L1 = 0x02 /**< prefer larger L1 cache and smaller shared memory */ 285 } CUfunc_cache; 286 287 /** 288 * Memory types 289 */ 290 typedef enum CUmemorytype_enum 291 { 292 CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */ 293 CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */ 294 CU_MEMORYTYPE_ARRAY = 0x03 /**< Array memory */ 295 #if __CUDA_API_VERSION >= 4000 296 , CU_MEMORYTYPE_UNIFIED = 0x04 /**< Unified device or host memory */ 297 #endif 298 } CUmemorytype; 299 300 /** 301 * Compute Modes 302 */ 303 typedef enum CUcomputemode_enum 304 { 305 CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple contexts allowed per device) */ 306 CU_COMPUTEMODE_EXCLUSIVE = 1, /**< Compute-exclusive-thread mode (Only one context used by a single thread can be present on this device at a time) */ 307 CU_COMPUTEMODE_PROHIBITED = 2 /**< Compute-prohibited mode (No contexts can be created on this device at this time) */ 308 #if __CUDA_API_VERSION >= 4000 309 , CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 /**< Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time) */ 310 #endif 311 } CUcomputemode; 312 313 /** 314 * Online compiler options 315 */ 316 typedef enum CUjit_option_enum 317 { 318 /** 319 * Max number of registers that a thread may use.\n 320 * Option type: unsigned int 321 */ 322 CU_JIT_MAX_REGISTERS = 0, 323 324 /** 325 * IN: Specifies minimum number of threads per block to target compilation 326 * for\n 327 * OUT: Returns the number of threads the compiler actually targeted. 328 * This restricts the resource utilization fo the compiler (e.g. max 329 * registers) such that a block with the given number of threads should be 330 * able to launch based on register limitations. Note, this option does not 331 * currently take into account any other resource limitations, such as 332 * shared memory utilization.\n 333 * Option type: unsigned int 334 */ 335 CU_JIT_THREADS_PER_BLOCK, 336 337 /** 338 * Returns a float value in the option of the wall clock time, in 339 * milliseconds, spent creating the cubin\n 340 * Option type: float 341 */ 342 CU_JIT_WALL_TIME, 343 344 /** 345 * Pointer to a buffer in which to print any log messsages from PTXAS 346 * that are informational in nature (the buffer size is specified via 347 * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) \n 348 * Option type: char* 349 */ 350 CU_JIT_INFO_LOG_BUFFER, 351 352 /** 353 * IN: Log buffer size in bytes. Log messages will be capped at this size 354 * (including null terminator)\n 355 * OUT: Amount of log buffer filled with messages\n 356 * Option type: unsigned int 357 */ 358 CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, 359 360 /** 361 * Pointer to a buffer in which to print any log messages from PTXAS that 362 * reflect errors (the buffer size is specified via option 363 * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n 364 * Option type: char* 365 */ 366 CU_JIT_ERROR_LOG_BUFFER, 367 368 /** 369 * IN: Log buffer size in bytes. Log messages will be capped at this size 370 * (including null terminator)\n 371 * OUT: Amount of log buffer filled with messages\n 372 * Option type: unsigned int 373 */ 374 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, 375 376 /** 377 * Level of optimizations to apply to generated code (0 - 4), with 4 378 * being the default and highest level of optimizations.\n 379 * Option type: unsigned int 380 */ 381 CU_JIT_OPTIMIZATION_LEVEL, 382 383 /** 384 * No option value required. Determines the target based on the current 385 * attached context (default)\n 386 * Option type: No option value needed 387 */ 388 CU_JIT_TARGET_FROM_CUCONTEXT, 389 390 /** 391 * Target is chosen based on supplied ::CUjit_target_enum.\n 392 * Option type: unsigned int for enumerated type ::CUjit_target_enum 393 */ 394 CU_JIT_TARGET, 395 396 /** 397 * Specifies choice of fallback strategy if matching cubin is not found. 398 * Choice is based on supplied ::CUjit_fallback_enum.\n 399 * Option type: unsigned int for enumerated type ::CUjit_fallback_enum 400 */ 401 CU_JIT_FALLBACK_STRATEGY 402 403 } CUjit_option; 404 405 /** 406 * Online compilation targets 407 */ 408 typedef enum CUjit_target_enum 409 { 410 CU_TARGET_COMPUTE_10 = 0, /**< Compute device class 1.0 */ 411 CU_TARGET_COMPUTE_11, /**< Compute device class 1.1 */ 412 CU_TARGET_COMPUTE_12, /**< Compute device class 1.2 */ 413 CU_TARGET_COMPUTE_13, /**< Compute device class 1.3 */ 414 CU_TARGET_COMPUTE_20, /**< Compute device class 2.0 */ 415 CU_TARGET_COMPUTE_21 /**< Compute device class 2.1 */ 416 } CUjit_target; 417 418 /** 419 * Cubin matching fallback strategies 420 */ 421 typedef enum CUjit_fallback_enum 422 { 423 CU_PREFER_PTX = 0, /**< Prefer to compile ptx */ 424 425 CU_PREFER_BINARY /**< Prefer to fall back to compatible binary code */ 426 427 } CUjit_fallback; 428 429 /** 430 * Flags to register a graphics resource 431 */ 432 typedef enum CUgraphicsRegisterFlags_enum 433 { 434 CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00, 435 CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01, 436 CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02, 437 CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04 438 } CUgraphicsRegisterFlags; 439 440 /** 441 * Flags for mapping and unmapping interop resources 442 */ 443 typedef enum CUgraphicsMapResourceFlags_enum 444 { 445 CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, 446 CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, 447 CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 448 } CUgraphicsMapResourceFlags; 449 450 /** 451 * Array indices for cube faces 452 */ 453 typedef enum CUarray_cubemap_face_enum 454 { 455 CU_CUBEMAP_FACE_POSITIVE_X = 0x00, /**< Positive X face of cubemap */ 456 CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, /**< Negative X face of cubemap */ 457 CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, /**< Positive Y face of cubemap */ 458 CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /**< Negative Y face of cubemap */ 459 CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, /**< Positive Z face of cubemap */ 460 CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 /**< Negative Z face of cubemap */ 461 } CUarray_cubemap_face; 462 463 /** 464 * Limits 465 */ 466 typedef enum CUlimit_enum 467 { 468 CU_LIMIT_STACK_SIZE = 0x00, /**< GPU thread stack size */ 469 CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, /**< GPU printf FIFO size */ 470 CU_LIMIT_MALLOC_HEAP_SIZE = 0x02 /**< GPU malloc heap size */ 471 } CUlimit; 472 473 /** 474 * Error codes 475 */ 476 typedef enum cudaError_enum 477 { 478 /** 479 * The API call returned with no errors. In the case of query calls, this 480 * can also mean that the operation being queried is complete (see 481 * ::cuEventQuery() and ::cuStreamQuery()). 482 */ 483 CUDA_SUCCESS = 0, 484 485 /** 486 * This indicates that one or more of the parameters passed to the API call 487 * is not within an acceptable range of values. 488 */ 489 CUDA_ERROR_INVALID_VALUE = 1, 490 491 /** 492 * The API call failed because it was unable to allocate enough memory to 493 * perform the requested operation. 494 */ 495 CUDA_ERROR_OUT_OF_MEMORY = 2, 496 497 /** 498 * This indicates that the CUDA driver has not been initialized with 499 * ::cuInit() or that initialization has failed. 500 */ 501 CUDA_ERROR_NOT_INITIALIZED = 3, 502 503 /** 504 * This indicates that the CUDA driver is in the process of shutting down. 505 */ 506 CUDA_ERROR_DEINITIALIZED = 4, 507 508 /** 509 * This indicates profiling APIs are called while application is running 510 * in visual profiler mode. 511 */ 512 CUDA_ERROR_PROFILER_DISABLED = 5, 513 /** 514 * This indicates profiling has not been initialized for this context. 515 * Call cuProfilerInitialize() to resolve this. 516 */ 517 CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, 518 /** 519 * This indicates profiler has already been started and probably 520 * cuProfilerStart() is incorrectly called. 521 */ 522 CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, 523 /** 524 * This indicates profiler has already been stopped and probably 525 * cuProfilerStop() is incorrectly called. 526 */ 527 CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, 528 /** 529 * This indicates that no CUDA-capable devices were detected by the installed 530 * CUDA driver. 531 */ 532 CUDA_ERROR_NO_DEVICE = 100, 533 534 /** 535 * This indicates that the device ordinal supplied by the user does not 536 * correspond to a valid CUDA device. 537 */ 538 CUDA_ERROR_INVALID_DEVICE = 101, 539 540 541 /** 542 * This indicates that the device kernel image is invalid. This can also 543 * indicate an invalid CUDA module. 544 */ 545 CUDA_ERROR_INVALID_IMAGE = 200, 546 547 /** 548 * This most frequently indicates that there is no context bound to the 549 * current thread. This can also be returned if the context passed to an 550 * API call is not a valid handle (such as a context that has had 551 * ::cuCtxDestroy() invoked on it). This can also be returned if a user 552 * mixes different API versions (i.e. 3010 context with 3020 API calls). 553 * See ::cuCtxGetApiVersion() for more details. 554 */ 555 CUDA_ERROR_INVALID_CONTEXT = 201, 556 557 /** 558 * This indicated that the context being supplied as a parameter to the 559 * API call was already the active context. 560 * \deprecated 561 * This error return is deprecated as of CUDA 3.2. It is no longer an 562 * error to attempt to push the active context via ::cuCtxPushCurrent(). 563 */ 564 CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, 565 566 /** 567 * This indicates that a map or register operation has failed. 568 */ 569 CUDA_ERROR_MAP_FAILED = 205, 570 571 /** 572 * This indicates that an unmap or unregister operation has failed. 573 */ 574 CUDA_ERROR_UNMAP_FAILED = 206, 575 576 /** 577 * This indicates that the specified array is currently mapped and thus 578 * cannot be destroyed. 579 */ 580 CUDA_ERROR_ARRAY_IS_MAPPED = 207, 581 582 /** 583 * This indicates that the resource is already mapped. 584 */ 585 CUDA_ERROR_ALREADY_MAPPED = 208, 586 587 /** 588 * This indicates that there is no kernel image available that is suitable 589 * for the device. This can occur when a user specifies code generation 590 * options for a particular CUDA source file that do not include the 591 * corresponding device configuration. 592 */ 593 CUDA_ERROR_NO_BINARY_FOR_GPU = 209, 594 595 /** 596 * This indicates that a resource has already been acquired. 597 */ 598 CUDA_ERROR_ALREADY_ACQUIRED = 210, 599 600 /** 601 * This indicates that a resource is not mapped. 602 */ 603 CUDA_ERROR_NOT_MAPPED = 211, 604 605 /** 606 * This indicates that a mapped resource is not available for access as an 607 * array. 608 */ 609 CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, 610 611 /** 612 * This indicates that a mapped resource is not available for access as a 613 * pointer. 614 */ 615 CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, 616 617 /** 618 * This indicates that an uncorrectable ECC error was detected during 619 * execution. 620 */ 621 CUDA_ERROR_ECC_UNCORRECTABLE = 214, 622 623 /** 624 * This indicates that the ::CUlimit passed to the API call is not 625 * supported by the active device. 626 */ 627 CUDA_ERROR_UNSUPPORTED_LIMIT = 215, 628 629 /** 630 * This indicates that the ::CUcontext passed to the API call can 631 * only be bound to a single CPU thread at a time but is already 632 * bound to a CPU thread. 633 */ 634 CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, 635 636 /** 637 * This indicates that the device kernel source is invalid. 638 */ 639 CUDA_ERROR_INVALID_SOURCE = 300, 640 641 /** 642 * This indicates that the file specified was not found. 643 */ 644 CUDA_ERROR_FILE_NOT_FOUND = 301, 645 646 /** 647 * This indicates that a link to a shared object failed to resolve. 648 */ 649 CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, 650 651 /** 652 * This indicates that initialization of a shared object failed. 653 */ 654 CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, 655 656 /** 657 * This indicates that an OS call failed. 658 */ 659 CUDA_ERROR_OPERATING_SYSTEM = 304, 660 661 662 /** 663 * This indicates that a resource handle passed to the API call was not 664 * valid. Resource handles are opaque types like ::CUstream and ::CUevent. 665 */ 666 CUDA_ERROR_INVALID_HANDLE = 400, 667 668 669 /** 670 * This indicates that a named symbol was not found. Examples of symbols 671 * are global/constant variable names, texture names, and surface names. 672 */ 673 CUDA_ERROR_NOT_FOUND = 500, 674 675 676 /** 677 * This indicates that asynchronous operations issued previously have not 678 * completed yet. This result is not actually an error, but must be indicated 679 * differently than ::CUDA_SUCCESS (which indicates completion). Calls that 680 * may return this value include ::cuEventQuery() and ::cuStreamQuery(). 681 */ 682 CUDA_ERROR_NOT_READY = 600, 683 684 685 /** 686 * An exception occurred on the device while executing a kernel. Common 687 * causes include dereferencing an invalid device pointer and accessing 688 * out of bounds shared memory. The context cannot be used, so it must 689 * be destroyed (and a new one should be created). All existing device 690 * memory allocations from this context are invalid and must be 691 * reconstructed if the program is to continue using CUDA. 692 */ 693 CUDA_ERROR_LAUNCH_FAILED = 700, 694 695 /** 696 * This indicates that a launch did not occur because it did not have 697 * appropriate resources. This error usually indicates that the user has 698 * attempted to pass too many arguments to the device kernel, or the 699 * kernel launch specifies too many threads for the kernel's register 700 * count. Passing arguments of the wrong size (i.e. a 64-bit pointer 701 * when a 32-bit int is expected) is equivalent to passing too many 702 * arguments and can also result in this error. 703 */ 704 CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, 705 706 /** 707 * This indicates that the device kernel took too long to execute. This can 708 * only occur if timeouts are enabled - see the device attribute 709 * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The 710 * context cannot be used (and must be destroyed similar to 711 * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from 712 * this context are invalid and must be reconstructed if the program is to 713 * continue using CUDA. 714 */ 715 CUDA_ERROR_LAUNCH_TIMEOUT = 702, 716 717 /** 718 * This error indicates a kernel launch that uses an incompatible texturing 719 * mode. 720 */ 721 CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, 722 723 /** 724 * This error indicates that a call to ::cuCtxEnablePeerAccess() is 725 * trying to re-enable peer access to a context which has already 726 * had peer access to it enabled. 727 */ 728 CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, 729 730 /** 731 * This error indicates that a call to ::cuMemPeerRegister is trying to 732 * register memory from a context which has not had peer access 733 * enabled yet via ::cuCtxEnablePeerAccess(), or that 734 * ::cuCtxDisablePeerAccess() is trying to disable peer access 735 * which has not been enabled yet. 736 */ 737 CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, 738 739 /** 740 * This error indicates that a call to ::cuMemPeerRegister is trying to 741 * register already-registered memory. 742 */ 743 CUDA_ERROR_PEER_MEMORY_ALREADY_REGISTERED = 706, 744 745 /** 746 * This error indicates that a call to ::cuMemPeerUnregister is trying to 747 * unregister memory that has not been registered. 748 */ 749 CUDA_ERROR_PEER_MEMORY_NOT_REGISTERED = 707, 750 751 /** 752 * This error indicates that ::cuCtxCreate was called with the flag 753 * ::CU_CTX_PRIMARY on a device which already has initialized its 754 * primary context. 755 */ 756 CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, 757 758 /** 759 * This error indicates that the context current to the calling thread 760 * has been destroyed using ::cuCtxDestroy, or is a primary context which 761 * has not yet been initialized. 762 */ 763 CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, 764 765 /** 766 * This indicates that an unknown internal error has occurred. 767 */ 768 CUDA_ERROR_UNKNOWN = 999 769 } CUresult; 770 771 #if __CUDA_API_VERSION >= 4000 772 /** 773 * If set, host memory is portable between CUDA contexts. 774 * Flag for ::cuMemHostAlloc() 775 */ 776 #define CU_MEMHOSTALLOC_PORTABLE 0x01 777 778 /** 779 * If set, host memory is mapped into CUDA address space and 780 * ::cuMemHostGetDevicePointer() may be called on the host pointer. 781 * Flag for ::cuMemHostAlloc() 782 */ 783 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02 784 785 /** 786 * If set, host memory is allocated as write-combined - fast to write, 787 * faster to DMA, slow to read except via SSE4 streaming load instruction 788 * (MOVNTDQA). 789 * Flag for ::cuMemHostAlloc() 790 */ 791 #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 792 793 /** 794 * If set, host memory is portable between CUDA contexts. 795 * Flag for ::cuMemHostRegister() 796 */ 797 #define CU_MEMHOSTREGISTER_PORTABLE 0x01 798 799 /** 800 * If set, host memory is mapped into CUDA address space and 801 * ::cuMemHostGetDevicePointer() may be called on the host pointer. 802 * Flag for ::cuMemHostRegister() 803 */ 804 #define CU_MEMHOSTREGISTER_DEVICEMAP 0x02 805 806 /** 807 * If set, peer memory is mapped into CUDA address space and 808 * ::cuMemPeerGetDevicePointer() may be called on the host pointer. 809 * Flag for ::cuMemPeerRegister() 810 */ 811 #define CU_MEMPEERREGISTER_DEVICEMAP 0x02 812 #endif 813 814 #if __CUDA_API_VERSION >= 3020 815 /** 816 * 2D memory copy parameters 817 */ 818 typedef struct CUDA_MEMCPY2D_st 819 { 820 size_t srcXInBytes; /**< Source X in bytes */ 821 size_t srcY; /**< Source Y */ 822 823 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 824 const void *srcHost; /**< Source host pointer */ 825 CUdeviceptr srcDevice; /**< Source device pointer */ 826 CUarray srcArray; /**< Source array reference */ 827 size_t srcPitch; /**< Source pitch (ignored when src is array) */ 828 829 size_t dstXInBytes; /**< Destination X in bytes */ 830 size_t dstY; /**< Destination Y */ 831 832 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 833 void *dstHost; /**< Destination host pointer */ 834 CUdeviceptr dstDevice; /**< Destination device pointer */ 835 CUarray dstArray; /**< Destination array reference */ 836 size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ 837 838 size_t WidthInBytes; /**< Width of 2D memory copy in bytes */ 839 size_t Height; /**< Height of 2D memory copy */ 840 } CUDA_MEMCPY2D; 841 842 /** 843 * 3D memory copy parameters 844 */ 845 typedef struct CUDA_MEMCPY3D_st 846 { 847 size_t srcXInBytes; /**< Source X in bytes */ 848 size_t srcY; /**< Source Y */ 849 size_t srcZ; /**< Source Z */ 850 size_t srcLOD; /**< Source LOD */ 851 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 852 const void *srcHost; /**< Source host pointer */ 853 CUdeviceptr srcDevice; /**< Source device pointer */ 854 CUarray srcArray; /**< Source array reference */ 855 void *reserved0; /**< Must be NULL */ 856 size_t srcPitch; /**< Source pitch (ignored when src is array) */ 857 size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ 858 859 size_t dstXInBytes; /**< Destination X in bytes */ 860 size_t dstY; /**< Destination Y */ 861 size_t dstZ; /**< Destination Z */ 862 size_t dstLOD; /**< Destination LOD */ 863 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 864 void *dstHost; /**< Destination host pointer */ 865 CUdeviceptr dstDevice; /**< Destination device pointer */ 866 CUarray dstArray; /**< Destination array reference */ 867 void *reserved1; /**< Must be NULL */ 868 size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ 869 size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ 870 871 size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ 872 size_t Height; /**< Height of 3D memory copy */ 873 size_t Depth; /**< Depth of 3D memory copy */ 874 } CUDA_MEMCPY3D; 875 876 /** 877 * 3D memory cross-context copy parameters 878 */ 879 typedef struct CUDA_MEMCPY3D_PEER_st 880 { 881 size_t srcXInBytes; /**< Source X in bytes */ 882 size_t srcY; /**< Source Y */ 883 size_t srcZ; /**< Source Z */ 884 size_t srcLOD; /**< Source LOD */ 885 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 886 const void *srcHost; /**< Source host pointer */ 887 CUdeviceptr srcDevice; /**< Source device pointer */ 888 CUarray srcArray; /**< Source array reference */ 889 CUcontext srcContext; /**< Source context (ignored with srcMemoryType is ::CU_MEMORYTYPE_ARRAY) */ 890 size_t srcPitch; /**< Source pitch (ignored when src is array) */ 891 size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ 892 893 size_t dstXInBytes; /**< Destination X in bytes */ 894 size_t dstY; /**< Destination Y */ 895 size_t dstZ; /**< Destination Z */ 896 size_t dstLOD; /**< Destination LOD */ 897 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 898 void *dstHost; /**< Destination host pointer */ 899 CUdeviceptr dstDevice; /**< Destination device pointer */ 900 CUarray dstArray; /**< Destination array reference */ 901 CUcontext dstContext; /**< Destination context (ignored with dstMemoryType is ::CU_MEMORYTYPE_ARRAY) */ 902 size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ 903 size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ 904 905 size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ 906 size_t Height; /**< Height of 3D memory copy */ 907 size_t Depth; /**< Depth of 3D memory copy */ 908 } CUDA_MEMCPY3D_PEER; 909 910 /** 911 * Array descriptor 912 */ 913 typedef struct CUDA_ARRAY_DESCRIPTOR_st 914 { 915 size_t Width; /**< Width of array */ 916 size_t Height; /**< Height of array */ 917 918 CUarray_format Format; /**< Array format */ 919 unsigned int NumChannels; /**< Channels per array element */ 920 } CUDA_ARRAY_DESCRIPTOR; 921 922 /** 923 * 3D array descriptor 924 */ 925 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st 926 { 927 size_t Width; /**< Width of 3D array */ 928 size_t Height; /**< Height of 3D array */ 929 size_t Depth; /**< Depth of 3D array */ 930 931 CUarray_format Format; /**< Array format */ 932 unsigned int NumChannels; /**< Channels per array element */ 933 unsigned int Flags; /**< Flags */ 934 } CUDA_ARRAY3D_DESCRIPTOR; 935 936 #endif /* __CUDA_API_VERSION >= 3020 */ 937 938 /** 939 * If set, the CUDA array is a collection of layers, where each layer is either a 1D 940 * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number 941 * of layers, not the depth of a 3D array. 942 */ 943 #define CUDA_ARRAY3D_LAYERED 0x01 944 945 /** 946 * Deprecated, use CUDA_ARRAY3D_LAYERED 947 */ 948 #define CUDA_ARRAY3D_2DARRAY 0x01 949 950 /** 951 * This flag must be set in order to bind a surface reference 952 * to the CUDA array 953 */ 954 #define CUDA_ARRAY3D_SURFACE_LDST 0x02 955 956 /** 957 * Override the texref format with a format inferred from the array. 958 * Flag for ::cuTexRefSetArray() 959 */ 960 #define CU_TRSA_OVERRIDE_FORMAT 0x01 961 962 /** 963 * Read the texture as integers rather than promoting the values to floats 964 * in the range [0,1]. 965 * Flag for ::cuTexRefSetFlags() 966 */ 967 #define CU_TRSF_READ_AS_INTEGER 0x01 968 969 /** 970 * Use normalized texture coordinates in the range [0,1) instead of [0,dim). 971 * Flag for ::cuTexRefSetFlags() 972 */ 973 #define CU_TRSF_NORMALIZED_COORDINATES 0x02 974 975 /** 976 * Perform sRGB->linear conversion during texture read. 977 * Flag for ::cuTexRefSetFlags() 978 */ 979 #define CU_TRSF_SRGB 0x10 980 981 /** 982 * End of array terminator for the \p extra parameter to 983 * ::cuLaunchKernel 984 */ 985 #define CU_LAUNCH_PARAM_END ((void*)0x00) 986 987 /** 988 * Indicator that the next value in the \p extra parameter to 989 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel 990 * parameters used for launching kernel \p f. This buffer needs to 991 * honor all alignment/padding requirements of the individual parameters. 992 * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the 993 * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no 994 * effect. 995 */ 996 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) 997 998 /** 999 * Indicator that the next value in the \p extra parameter to 1000 * ::cuLaunchKernel will be a pointer to a size_t which contains the 1001 * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER. 1002 * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified 1003 * in the \p extra array if the value associated with 1004 * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. 1005 */ 1006 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) 1007 1008 /** 1009 * For texture references loaded into the module, use default texunit from 1010 * texture reference. 1011 */ 1012 #define CU_PARAM_TR_DEFAULT -1 1013 1014 /** 1015 * CUDA API made obselete at API version 3020 1016 */ 1017 #if defined(__CUDA_API_VERSION_INTERNAL) 1018 #define CUdeviceptr CUdeviceptr_v1 1019 #define CUDA_MEMCPY2D_st CUDA_MEMCPY2D_v1_st 1020 #define CUDA_MEMCPY2D CUDA_MEMCPY2D_v1 1021 #define CUDA_MEMCPY3D_st CUDA_MEMCPY3D_v1_st 1022 #define CUDA_MEMCPY3D CUDA_MEMCPY3D_v1 1023 #define CUDA_ARRAY_DESCRIPTOR_st CUDA_ARRAY_DESCRIPTOR_v1_st 1024 #define CUDA_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR_v1 1025 #define CUDA_ARRAY3D_DESCRIPTOR_st CUDA_ARRAY3D_DESCRIPTOR_v1_st 1026 #define CUDA_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR_v1 1027 #endif /* CUDA_FORCE_LEGACY32_INTERNAL */ 1028 1029 #if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020 1030 typedef unsigned int CUdeviceptr; 1031 1032 typedef struct CUDA_MEMCPY2D_st 1033 { 1034 unsigned int srcXInBytes; /**< Source X in bytes */ 1035 unsigned int srcY; /**< Source Y */ 1036 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 1037 const void *srcHost; /**< Source host pointer */ 1038 CUdeviceptr srcDevice; /**< Source device pointer */ 1039 CUarray srcArray; /**< Source array reference */ 1040 unsigned int srcPitch; /**< Source pitch (ignored when src is array) */ 1041 1042 unsigned int dstXInBytes; /**< Destination X in bytes */ 1043 unsigned int dstY; /**< Destination Y */ 1044 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 1045 void *dstHost; /**< Destination host pointer */ 1046 CUdeviceptr dstDevice; /**< Destination device pointer */ 1047 CUarray dstArray; /**< Destination array reference */ 1048 unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */ 1049 1050 unsigned int WidthInBytes; /**< Width of 2D memory copy in bytes */ 1051 unsigned int Height; /**< Height of 2D memory copy */ 1052 } CUDA_MEMCPY2D; 1053 1054 typedef struct CUDA_MEMCPY3D_st 1055 { 1056 unsigned int srcXInBytes; /**< Source X in bytes */ 1057 unsigned int srcY; /**< Source Y */ 1058 unsigned int srcZ; /**< Source Z */ 1059 unsigned int srcLOD; /**< Source LOD */ 1060 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 1061 const void *srcHost; /**< Source host pointer */ 1062 CUdeviceptr srcDevice; /**< Source device pointer */ 1063 CUarray srcArray; /**< Source array reference */ 1064 void *reserved0; /**< Must be NULL */ 1065 unsigned int srcPitch; /**< Source pitch (ignored when src is array) */ 1066 unsigned int srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ 1067 1068 unsigned int dstXInBytes; /**< Destination X in bytes */ 1069 unsigned int dstY; /**< Destination Y */ 1070 unsigned int dstZ; /**< Destination Z */ 1071 unsigned int dstLOD; /**< Destination LOD */ 1072 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 1073 void *dstHost; /**< Destination host pointer */ 1074 CUdeviceptr dstDevice; /**< Destination device pointer */ 1075 CUarray dstArray; /**< Destination array reference */ 1076 void *reserved1; /**< Must be NULL */ 1077 unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */ 1078 unsigned int dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ 1079 1080 unsigned int WidthInBytes; /**< Width of 3D memory copy in bytes */ 1081 unsigned int Height; /**< Height of 3D memory copy */ 1082 unsigned int Depth; /**< Depth of 3D memory copy */ 1083 } CUDA_MEMCPY3D; 1084 1085 typedef struct CUDA_ARRAY_DESCRIPTOR_st 1086 { 1087 unsigned int Width; /**< Width of array */ 1088 unsigned int Height; /**< Height of array */ 1089 1090 CUarray_format Format; /**< Array format */ 1091 unsigned int NumChannels; /**< Channels per array element */ 1092 } CUDA_ARRAY_DESCRIPTOR; 1093 1094 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st 1095 { 1096 unsigned int Width; /**< Width of 3D array */ 1097 unsigned int Height; /**< Height of 3D array */ 1098 unsigned int Depth; /**< Depth of 3D array */ 1099 1100 CUarray_format Format; /**< Array format */ 1101 unsigned int NumChannels; /**< Channels per array element */ 1102 unsigned int Flags; /**< Flags */ 1103 } CUDA_ARRAY3D_DESCRIPTOR; 1104 1105 #endif /* (__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020 */ 1106 1107 /* 1108 * If set, the CUDA array contains an array of 2D slices 1109 * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies 1110 * the number of slices, not the depth of a 3D array. 1111 */ 1112 #define CUDA_ARRAY3D_2DARRAY 0x01 1113 1114 /** 1115 * This flag must be set in order to bind a surface reference 1116 * to the CUDA array 1117 */ 1118 #define CUDA_ARRAY3D_SURFACE_LDST 0x02 1119 1120 /** 1121 * Override the texref format with a format inferred from the array. 1122 * Flag for ::cuTexRefSetArray() 1123 */ 1124 #define CU_TRSA_OVERRIDE_FORMAT 0x01 1125 1126 /** 1127 * Read the texture as integers rather than promoting the values to floats 1128 * in the range [0,1]. 1129 * Flag for ::cuTexRefSetFlags() 1130 */ 1131 #define CU_TRSF_READ_AS_INTEGER 0x01 1132 1133 /** 1134 * Use normalized texture coordinates in the range [0,1) instead of [0,dim). 1135 * Flag for ::cuTexRefSetFlags() 1136 */ 1137 #define CU_TRSF_NORMALIZED_COORDINATES 0x02 1138 1139 /** 1140 * Perform sRGB->linear conversion during texture read. 1141 * Flag for ::cuTexRefSetFlags() 1142 */ 1143 #define CU_TRSF_SRGB 0x10 1144 1145 /** 1146 * For texture references loaded into the module, use default texunit from 1147 * texture reference. 1148 */ 1149 #define CU_PARAM_TR_DEFAULT -1 1150 1151 /** @} */ /* END CUDA_TYPES */ 1152 1153 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 1154 #define CUDAAPI __stdcall 1155 #else 1156 #define CUDAAPI 1157 #endif 1158 1159 /** 1160 * \defgroup CUDA_INITIALIZE Initialization 1161 * 1162 * This section describes the initialization functions of the low-level CUDA 1163 * driver application programming interface. 1164 * 1165 * @{ 1166 */ 1167 1168 /********************************* 1169 ** Initialization 1170 *********************************/ 1171 typedef CUresult CUDAAPI tcuInit(unsigned int Flags); 1172 1173 /********************************* 1174 ** Driver Version Query 1175 *********************************/ 1176 typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion); 1177 1178 /************************************ 1179 ** 1180 ** Device management 1181 ** 1182 ***********************************/ 1183 1184 typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal); 1185 typedef CUresult CUDAAPI tcuDeviceGetCount(int *count); 1186 typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev); 1187 typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev); 1188 #if __CUDA_API_VERSION >= 3020 1189 typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev); 1190 #else 1191 typedef CUresult CUDAAPI tcuDeviceTotalMem(unsigned int *bytes, CUdevice dev); 1192 #endif 1193 1194 typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev); 1195 typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev); 1196 1197 /************************************ 1198 ** 1199 ** Context management 1200 ** 1201 ***********************************/ 1202 typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev); 1203 typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx); 1204 typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags); 1205 typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx); 1206 typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx); 1207 typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx); 1208 1209 typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx); 1210 typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext *pctx); 1211 1212 typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device); 1213 typedef CUresult CUDAAPI tcuCtxSynchronize(void); 1214 1215 1216 /************************************ 1217 ** 1218 ** Module management 1219 ** 1220 ***********************************/ 1221 typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname); 1222 typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image); 1223 typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); 1224 typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin); 1225 typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod); 1226 typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name); 1227 1228 #if __CUDA_API_VERSION >= 3020 1229 typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name); 1230 #else 1231 typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *bytes, CUmodule hmod, const char *name); 1232 #endif 1233 1234 typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name); 1235 typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name); 1236 1237 /************************************ 1238 ** 1239 ** Memory management 1240 ** 1241 ***********************************/ 1242 #if __CUDA_API_VERSION >= 3020 1243 typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total); 1244 typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize); 1245 typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr); 1246 typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, 1247 size_t *pPitch, 1248 size_t WidthInBytes, 1249 size_t Height, 1250 // size of biggest r/w to be performed by kernels on this memory 1251 // 4, 8 or 16 bytes 1252 unsigned int ElementSizeBytes 1253 ); 1254 #else 1255 typedef CUresult CUDAAPI tcuMemGetInfo(unsigned int *free, unsigned int *total); 1256 typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, unsigned int bytesize); 1257 typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, unsigned int *psize, CUdeviceptr dptr); 1258 typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, 1259 unsigned int *pPitch, 1260 unsigned int WidthInBytes, 1261 unsigned int Height, 1262 // size of biggest r/w to be performed by kernels on this memory 1263 // 4, 8 or 16 bytes 1264 unsigned int ElementSizeBytes 1265 ); 1266 #endif 1267 1268 typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr); 1269 1270 #if __CUDA_API_VERSION >= 3020 1271 typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize); 1272 #else 1273 typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, unsigned int bytesize); 1274 #endif 1275 1276 typedef CUresult CUDAAPI tcuMemFreeHost(void *p); 1277 typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags); 1278 1279 typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags); 1280 typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p); 1281 1282 typedef CUresult CUDAAPI tcuMemHostRegister(void *p, size_t bytesize, unsigned int Flags); 1283 typedef CUresult CUDAAPI tcuMemHostUnregister(void *p);; 1284 typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount); 1285 typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount); 1286 1287 /************************************ 1288 ** 1289 ** Synchronous Memcpy 1290 ** 1291 ** Intra-device memcpy's done with these functions may execute in parallel with the CPU, 1292 ** but if host memory is involved, they wait until the copy is done before returning. 1293 ** 1294 ***********************************/ 1295 // 1D functions 1296 #if __CUDA_API_VERSION >= 3020 1297 // system <-> device memory 1298 typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount); 1299 typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount); 1300 1301 // device <-> device memory 1302 typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount); 1303 1304 // device <-> array memory 1305 typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount); 1306 typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount); 1307 1308 // system <-> array memory 1309 typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount); 1310 typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount); 1311 1312 // array <-> array memory 1313 typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount); 1314 #else 1315 // system <-> device memory 1316 typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount); 1317 typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount); 1318 1319 // device <-> device memory 1320 typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount); 1321 1322 // device <-> array memory 1323 typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUdeviceptr srcDevice, unsigned int ByteCount); 1324 typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); 1325 1326 // system <-> array memory 1327 typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount); 1328 typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); 1329 1330 // array <-> array memory 1331 typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); 1332 #endif 1333 1334 // 2D memcpy 1335 typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy); 1336 typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy); 1337 1338 // 3D memcpy 1339 typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy); 1340 1341 /************************************ 1342 ** 1343 ** Asynchronous Memcpy 1344 ** 1345 ** Any host memory involved must be DMA'able (e.g., allocated with cuMemAllocHost). 1346 ** memcpy's done with these functions execute in parallel with the CPU and, if 1347 ** the hardware is available, may execute in parallel with the GPU. 1348 ** Asynchronous memcpy must be accompanied by appropriate stream synchronization. 1349 ** 1350 ***********************************/ 1351 1352 // 1D functions 1353 #if __CUDA_API_VERSION >= 3020 1354 // system <-> device memory 1355 typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, 1356 const void *srcHost, size_t ByteCount, CUstream hStream); 1357 typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, 1358 CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); 1359 1360 // device <-> device memory 1361 typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, 1362 CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); 1363 1364 // system <-> array memory 1365 typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, 1366 const void *srcHost, size_t ByteCount, CUstream hStream); 1367 typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, 1368 size_t ByteCount, CUstream hStream); 1369 #else 1370 // system <-> device memory 1371 typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, 1372 const void *srcHost, unsigned int ByteCount, CUstream hStream); 1373 typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, 1374 CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream); 1375 1376 // device <-> device memory 1377 typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, 1378 CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream); 1379 1380 // system <-> array memory 1381 typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset, 1382 const void *srcHost, unsigned int ByteCount, CUstream hStream); 1383 typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigned int srcOffset, 1384 unsigned int ByteCount, CUstream hStream); 1385 #endif 1386 1387 // 2D memcpy 1388 typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream); 1389 1390 // 3D memcpy 1391 typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream); 1392 1393 /************************************ 1394 ** 1395 ** Memset 1396 ** 1397 ***********************************/ 1398 typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, unsigned int N); 1399 typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, unsigned int N); 1400 typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, unsigned int N); 1401 1402 #if __CUDA_API_VERSION >= 3020 1403 typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, size_t Width, size_t Height); 1404 typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, size_t Width, size_t Height); 1405 typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, size_t Width, size_t Height); 1406 #else 1407 typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, unsigned int Width, unsigned int Height); 1408 typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, unsigned int Width, unsigned int Height); 1409 typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, unsigned int Width, unsigned int Height); 1410 #endif 1411 1412 /************************************ 1413 ** 1414 ** Function management 1415 ** 1416 ***********************************/ 1417 1418 1419 typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z); 1420 typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes); 1421 typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc); 1422 typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config); 1423 typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, 1424 unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, 1425 unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, 1426 unsigned int sharedMemBytes, 1427 CUstream hStream, void **kernelParams, void **extra); 1428 1429 /************************************ 1430 ** 1431 ** Array management 1432 ** 1433 ***********************************/ 1434 1435 typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray); 1436 typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray); 1437 typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray); 1438 1439 typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); 1440 typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray); 1441 1442 1443 /************************************ 1444 ** 1445 ** Texture reference management 1446 ** 1447 ***********************************/ 1448 typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef); 1449 typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef); 1450 1451 typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags); 1452 1453 #if __CUDA_API_VERSION >= 3020 1454 typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes); 1455 typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch); 1456 #else 1457 typedef CUresult CUDAAPI tcuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, unsigned int bytes); 1458 typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch); 1459 #endif 1460 1461 typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents); 1462 typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am); 1463 typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm); 1464 typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags); 1465 1466 typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef); 1467 typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef); 1468 typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim); 1469 typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef); 1470 typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef); 1471 typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef); 1472 1473 /************************************ 1474 ** 1475 ** Surface reference management 1476 ** 1477 ***********************************/ 1478 typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags); 1479 typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef); 1480 1481 /************************************ 1482 ** 1483 ** Parameter management 1484 ** 1485 ***********************************/ 1486 1487 typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes); 1488 typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value); 1489 typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value); 1490 typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes); 1491 typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef); 1492 1493 1494 /************************************ 1495 ** 1496 ** Launch functions 1497 ** 1498 ***********************************/ 1499 1500 typedef CUresult CUDAAPI tcuLaunch(CUfunction f); 1501 typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height); 1502 typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream); 1503 1504 /************************************ 1505 ** 1506 ** Events 1507 ** 1508 ***********************************/ 1509 typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags); 1510 typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream); 1511 typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent); 1512 typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent); 1513 typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent); 1514 typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd); 1515 1516 /************************************ 1517 ** 1518 ** Streams 1519 ** 1520 ***********************************/ 1521 typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags); 1522 typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream); 1523 typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream); 1524 typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream); 1525 1526 /************************************ 1527 ** 1528 ** Graphics interop 1529 ** 1530 ***********************************/ 1531 typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource); 1532 typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); 1533 1534 #if __CUDA_API_VERSION >= 3020 1535 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource); 1536 #else 1537 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, unsigned int *pSize, CUgraphicsResource resource); 1538 #endif 1539 1540 typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags); 1541 typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); 1542 typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); 1543 1544 /************************************ 1545 ** 1546 ** Export tables 1547 ** 1548 ***********************************/ 1549 typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId); 1550 1551 /************************************ 1552 ** 1553 ** Limits 1554 ** 1555 ***********************************/ 1556 1557 typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value); 1558 typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit); 1559 1560 1561 extern tcuDriverGetVersion *cuDriverGetVersion; 1562 extern tcuDeviceGet *cuDeviceGet; 1563 extern tcuDeviceGetCount *cuDeviceGetCount; 1564 extern tcuDeviceGetName *cuDeviceGetName; 1565 extern tcuDeviceComputeCapability *cuDeviceComputeCapability; 1566 extern tcuDeviceGetProperties *cuDeviceGetProperties; 1567 extern tcuDeviceGetAttribute *cuDeviceGetAttribute; 1568 extern tcuCtxDestroy *cuCtxDestroy; 1569 extern tcuCtxAttach *cuCtxAttach; 1570 extern tcuCtxDetach *cuCtxDetach; 1571 extern tcuCtxPushCurrent *cuCtxPushCurrent; 1572 extern tcuCtxPopCurrent *cuCtxPopCurrent; 1573 1574 extern tcuCtxSetCurrent *cuCtxSetCurrent; 1575 extern tcuCtxGetCurrent *cuCtxGetCurrent; 1576 1577 extern tcuCtxGetDevice *cuCtxGetDevice; 1578 extern tcuCtxSynchronize *cuCtxSynchronize; 1579 extern tcuModuleLoad *cuModuleLoad; 1580 extern tcuModuleLoadData *cuModuleLoadData; 1581 extern tcuModuleLoadDataEx *cuModuleLoadDataEx; 1582 extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary; 1583 extern tcuModuleUnload *cuModuleUnload; 1584 extern tcuModuleGetFunction *cuModuleGetFunction; 1585 extern tcuModuleGetTexRef *cuModuleGetTexRef; 1586 extern tcuModuleGetSurfRef *cuModuleGetSurfRef; 1587 extern tcuMemFreeHost *cuMemFreeHost; 1588 extern tcuMemHostAlloc *cuMemHostAlloc; 1589 extern tcuMemHostGetFlags *cuMemHostGetFlags; 1590 1591 extern tcuMemHostRegister *cuMemHostRegister; 1592 extern tcuMemHostUnregister *cuMemHostUnregister; 1593 extern tcuMemcpy *cuMemcpy; 1594 extern tcuMemcpyPeer *cuMemcpyPeer; 1595 1596 extern tcuDeviceTotalMem *cuDeviceTotalMem; 1597 extern tcuCtxCreate *cuCtxCreate; 1598 extern tcuModuleGetGlobal *cuModuleGetGlobal; 1599 extern tcuMemGetInfo *cuMemGetInfo; 1600 extern tcuMemAlloc *cuMemAlloc; 1601 extern tcuMemAllocPitch *cuMemAllocPitch; 1602 extern tcuMemFree *cuMemFree; 1603 extern tcuMemGetAddressRange *cuMemGetAddressRange; 1604 extern tcuMemAllocHost *cuMemAllocHost; 1605 extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer; 1606 extern tcuFuncSetBlockShape *cuFuncSetBlockShape; 1607 extern tcuFuncSetSharedSize *cuFuncSetSharedSize; 1608 extern tcuFuncGetAttribute *cuFuncGetAttribute; 1609 extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig; 1610 extern tcuLaunchKernel *cuLaunchKernel; 1611 extern tcuArrayDestroy *cuArrayDestroy; 1612 extern tcuTexRefCreate *cuTexRefCreate; 1613 extern tcuTexRefDestroy *cuTexRefDestroy; 1614 extern tcuTexRefSetArray *cuTexRefSetArray; 1615 extern tcuTexRefSetFormat *cuTexRefSetFormat; 1616 extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode; 1617 extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode; 1618 extern tcuTexRefSetFlags *cuTexRefSetFlags; 1619 extern tcuTexRefGetArray *cuTexRefGetArray; 1620 extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode; 1621 extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode; 1622 extern tcuTexRefGetFormat *cuTexRefGetFormat; 1623 extern tcuTexRefGetFlags *cuTexRefGetFlags; 1624 extern tcuSurfRefSetArray *cuSurfRefSetArray; 1625 extern tcuSurfRefGetArray *cuSurfRefGetArray; 1626 extern tcuParamSetSize *cuParamSetSize; 1627 extern tcuParamSeti *cuParamSeti; 1628 extern tcuParamSetf *cuParamSetf; 1629 extern tcuParamSetv *cuParamSetv; 1630 extern tcuParamSetTexRef *cuParamSetTexRef; 1631 extern tcuLaunch *cuLaunch; 1632 extern tcuLaunchGrid *cuLaunchGrid; 1633 extern tcuLaunchGridAsync *cuLaunchGridAsync; 1634 extern tcuEventCreate *cuEventCreate; 1635 extern tcuEventRecord *cuEventRecord; 1636 extern tcuEventQuery *cuEventQuery; 1637 extern tcuEventSynchronize *cuEventSynchronize; 1638 extern tcuEventDestroy *cuEventDestroy; 1639 extern tcuEventElapsedTime *cuEventElapsedTime; 1640 extern tcuStreamCreate *cuStreamCreate; 1641 extern tcuStreamQuery *cuStreamQuery; 1642 extern tcuStreamSynchronize *cuStreamSynchronize; 1643 extern tcuStreamDestroy *cuStreamDestroy; 1644 extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; 1645 extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; 1646 extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; 1647 extern tcuGraphicsMapResources *cuGraphicsMapResources; 1648 extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources; 1649 extern tcuGetExportTable *cuGetExportTable; 1650 extern tcuCtxSetLimit *cuCtxSetLimit; 1651 extern tcuCtxGetLimit *cuCtxGetLimit; 1652 1653 // These functions could be using the CUDA 3.2 interface (_v2) 1654 extern tcuMemcpyHtoD *cuMemcpyHtoD; 1655 extern tcuMemcpyDtoH *cuMemcpyDtoH; 1656 extern tcuMemcpyDtoD *cuMemcpyDtoD; 1657 extern tcuMemcpyDtoA *cuMemcpyDtoA; 1658 extern tcuMemcpyAtoD *cuMemcpyAtoD; 1659 extern tcuMemcpyHtoA *cuMemcpyHtoA; 1660 extern tcuMemcpyAtoH *cuMemcpyAtoH; 1661 extern tcuMemcpyAtoA *cuMemcpyAtoA; 1662 extern tcuMemcpy2D *cuMemcpy2D; 1663 extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned; 1664 extern tcuMemcpy3D *cuMemcpy3D; 1665 extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync; 1666 extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync; 1667 extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync; 1668 extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync; 1669 extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync; 1670 extern tcuMemcpy2DAsync *cuMemcpy2DAsync; 1671 extern tcuMemcpy3DAsync *cuMemcpy3DAsync; 1672 extern tcuMemsetD8 *cuMemsetD8; 1673 extern tcuMemsetD16 *cuMemsetD16; 1674 extern tcuMemsetD32 *cuMemsetD32; 1675 extern tcuMemsetD2D8 *cuMemsetD2D8; 1676 extern tcuMemsetD2D16 *cuMemsetD2D16; 1677 extern tcuMemsetD2D32 *cuMemsetD2D32; 1678 extern tcuArrayCreate *cuArrayCreate; 1679 extern tcuArrayGetDescriptor *cuArrayGetDescriptor; 1680 extern tcuArray3DCreate *cuArray3DCreate; 1681 extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor; 1682 extern tcuTexRefSetAddress *cuTexRefSetAddress; 1683 extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D; 1684 extern tcuTexRefGetAddress *cuTexRefGetAddress; 1685 extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer; 1686 1687 /************************************/ 1688 CUresult CUDAAPI cuInit (unsigned int, int cudaVersion); 1689 void CUDAAPI cuUninit(); 1690 /************************************/ 1691 1692 1693 #ifndef __CUDA_API_VERSION 1694 #define __CUDA_API_VERSION 4000 1695 #endif 1696 1697 1698 #include <gpac/setup.h> 1699 #include "../../src/compositor/gl_inc.h" 1700 1701 /** 1702 * \file dynlink_cudaGL.h 1703 * \brief Header file for the OpenGL interoperability functions of the 1704 * low-level CUDA driver application programming interface. 1705 */ 1706 1707 /** 1708 * \defgroup CUDA_GL OpenGL Interoperability 1709 * \ingroup CUDA_DRIVER 1710 * 1711 * ___MANBRIEF___ OpenGL interoperability functions of the low-level CUDA 1712 * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___ 1713 * 1714 * This section describes the OpenGL interoperability functions of the 1715 * low-level CUDA driver application programming interface. Note that mapping 1716 * of OpenGL resources is performed with the graphics API agnostic, resource 1717 * mapping interface described in \ref CUDA_GRAPHICS "Graphics Interoperability". 1718 * 1719 * @{ 1720 */ 1721 1722 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 1723 #if !defined(WGL_NV_gpu_affinity) 1724 typedef void* HGPUNV; 1725 #endif 1726 #endif /* _WIN32 */ 1727 1728 typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags); 1729 typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags); 1730 1731 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 1732 typedef CUresult CUDAAPI tcuWGLGetDevice(CUdevice *pDevice, HGPUNV hGpu); 1733 #endif /* _WIN32 */ 1734 1735 /** 1736 * CUDA devices corresponding to an OpenGL device 1737 */ 1738 typedef enum { 1739 CU_GL_DEVICE_LIST_ALL = 0x01, /**< The CUDA devices for all GPUs used by the current OpenGL context */ 1740 CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02, /**< The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame */ 1741 CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03, /**< The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame */ 1742 } CUGLDeviceList; 1743 1744 #if __CUDA_API_VERSION >= 6050 1745 typedef CUresult CUDAAPI tcuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList); 1746 #endif /* __CUDA_API_VERSION >= 6050 */ 1747 1748 /** 1749 * \defgroup CUDA_GL_DEPRECATED OpenGL Interoperability [DEPRECATED] 1750 * 1751 * ___MANBRIEF___ deprecated OpenGL interoperability functions of the low-level 1752 * CUDA driver API (___CURRENT_FILE___) ___ENDMANBRIEF___ 1753 * 1754 * This section describes deprecated OpenGL interoperability functionality. 1755 * 1756 * @{ 1757 */ 1758 1759 /** Flags to map or unmap a resource */ 1760 typedef enum CUGLmap_flags_enum { 1761 CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00, 1762 CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, 1763 CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02, 1764 } CUGLmap_flags; 1765 1766 //#if __CUDA_API_VERSION >= 3020 1767 typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device); 1768 //#endif /* __CUDA_API_VERSION >= 3020 */ 1769 1770 typedef CUresult CUDAAPI tcuGLInit(void); 1771 typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer); 1772 1773 #if __CUDA_API_VERSION >= 3020 1774 typedef CUresult CUDAAPI tcuGLMapBufferObject(CUdeviceptr *dptr, size_t *size, GLuint buffer); 1775 #endif /* __CUDA_API_VERSION >= 3020 */ 1776 1777 typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer); 1778 typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer); 1779 typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Flags); 1780 1781 #if __CUDA_API_VERSION >= 3020 1782 typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync(CUdeviceptr *dptr, size_t *size, GLuint buffer, CUstream hStream); 1783 #endif /* __CUDA_API_VERSION >= 3020 */ 1784 1785 typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream); 1786 typedef CUresult CUDAAPI tcuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList); 1787 1788 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 1789 extern tcuWGLGetDevice *cuWGLGetDevice; 1790 #endif 1791 1792 extern tcuGLCtxCreate *cuGLCtxCreate; 1793 extern tcuGLCtxCreate *cuGLCtxCreate_v2; 1794 extern tcuGLMapBufferObject *cuGLMapBufferObject; 1795 extern tcuGLMapBufferObject *cuGLMapBufferObject_v2; 1796 extern tcuGLMapBufferObjectAsync *cuGLMapBufferObjectAsync; 1797 1798 #if __CUDA_API_VERSION >= 6050 1799 extern tcuGLGetDevices *cuGLGetDevices; 1800 #endif 1801 1802 extern tcuGraphicsGLRegisterBuffer *cuGraphicsGLRegisterBuffer; 1803 extern tcuGraphicsGLRegisterImage *cuGraphicsGLRegisterImage; 1804 extern tcuGLSetBufferObjectMapFlags *cuGLSetBufferObjectMapFlags; 1805 extern tcuGLRegisterBufferObject *cuGLRegisterBufferObject; 1806 1807 extern tcuGLUnmapBufferObject *cuGLUnmapBufferObject; 1808 extern tcuGLUnmapBufferObjectAsync *cuGLUnmapBufferObjectAsync; 1809 1810 extern tcuGLUnregisterBufferObject *cuGLUnregisterBufferObject; 1811 extern tcuGLGetDevices *cuGLGetDevices; // CUDA 6.5 only 1812 1813 1814 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 1815 #include <Windows.h> 1816 typedef HMODULE CUDADRIVER; 1817 #else 1818 typedef void *CUDADRIVER; 1819 #endif 1820 1821 1822 1823 1824 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) 1825 #if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020)) 1826 #define __CUVID_DEVPTR64 1827 #endif 1828 #endif 1829 1830 1831 typedef void *CUvideodecoder; 1832 typedef struct _CUcontextlock_st *CUvideoctxlock; 1833 1834 /** 1835 * \addtogroup VIDEO_DECODER Video Decoder 1836 * @{ 1837 */ 1838 1839 /*! 1840 * \enum cudaVideoCodec 1841 * Video Codec Enums 1842 */ 1843 typedef enum cudaVideoCodec_enum { 1844 cudaVideoCodec_MPEG1=0, /**< MPEG1 */ 1845 cudaVideoCodec_MPEG2, /**< MPEG2 */ 1846 cudaVideoCodec_MPEG4, /**< MPEG4 */ 1847 cudaVideoCodec_VC1, /**< VC1 */ 1848 cudaVideoCodec_H264, /**< H264 */ 1849 cudaVideoCodec_JPEG, /**< JPEG */ 1850 cudaVideoCodec_H264_SVC, /**< H264-SVC */ 1851 cudaVideoCodec_H264_MVC, /**< H264-MVC */ 1852 cudaVideoCodec_HEVC, /**< HEVC */ 1853 cudaVideoCodec_VP8, /**< VP8 */ 1854 cudaVideoCodec_VP9, /**< VP9 */ 1855 cudaVideoCodec_NumCodecs, /**< Max COdecs */ 1856 // Uncompressed YUV 1857 cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), /**< Y,U,V (4:2:0) */ 1858 cudaVideoCodec_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,V,U (4:2:0) */ 1859 cudaVideoCodec_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), /**< Y,UV (4:2:0) */ 1860 cudaVideoCodec_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), /**< YUYV/YUY2 (4:2:2) */ 1861 cudaVideoCodec_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) /**< UYVY (4:2:2) */ 1862 } cudaVideoCodec; 1863 1864 /*! 1865 * \enum cudaVideoSurfaceFormat 1866 * Video Surface Formats Enums 1867 */ 1868 typedef enum cudaVideoSurfaceFormat_enum { 1869 cudaVideoSurfaceFormat_NV12=0, /**< NV12 (currently the only supported output format) */ 1870 cudaVideoSurfaceFormat_P016=1 1871 } cudaVideoSurfaceFormat; 1872 1873 /*! 1874 * \enum cudaVideoDeinterlaceMode 1875 * Deinterlacing Modes Enums 1876 */ 1877 typedef enum cudaVideoDeinterlaceMode_enum { 1878 cudaVideoDeinterlaceMode_Weave=0, /**< Weave both fields (no deinterlacing) */ 1879 cudaVideoDeinterlaceMode_Bob, /**< Drop one field */ 1880 cudaVideoDeinterlaceMode_Adaptive /**< Adaptive deinterlacing */ 1881 } cudaVideoDeinterlaceMode; 1882 1883 /*! 1884 * \enum cudaVideoChromaFormat 1885 * Chroma Formats Enums 1886 */ 1887 typedef enum cudaVideoChromaFormat_enum { 1888 cudaVideoChromaFormat_Monochrome=0, /**< MonoChrome */ 1889 cudaVideoChromaFormat_420, /**< 4:2:0 */ 1890 cudaVideoChromaFormat_422, /**< 4:2:2 */ 1891 cudaVideoChromaFormat_444 /**< 4:4:4 */ 1892 } cudaVideoChromaFormat; 1893 1894 /*! 1895 * \enum cudaVideoCreateFlags 1896 * Decoder Flags Enums 1897 */ 1898 typedef enum cudaVideoCreateFlags_enum { 1899 cudaVideoCreate_Default = 0x00, /**< Default operation mode: use dedicated video engines */ 1900 cudaVideoCreate_PreferCUDA = 0x01, /**< Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) */ 1901 cudaVideoCreate_PreferDXVA = 0x02, /**< Go through DXVA internally if possible (requires D3D9 interop) */ 1902 cudaVideoCreate_PreferCUVID = 0x04 /**< Use dedicated video engines directly */ 1903 } cudaVideoCreateFlags; 1904 1905 /*! 1906 * \struct CUVIDDECODECREATEINFO 1907 * Struct used in create decoder 1908 */ 1909 typedef struct _CUVIDDECODECREATEINFO 1910 { 1911 unsigned long ulWidth; /**< Coded Sequence Width */ 1912 unsigned long ulHeight; /**< Coded Sequence Height */ 1913 unsigned long ulNumDecodeSurfaces; /**< Maximum number of internal decode surfaces */ 1914 cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */ 1915 cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) */ 1916 unsigned long ulCreationFlags; /**< Decoder creation flags (cudaVideoCreateFlags_XXX) */ 1917 unsigned long bitDepthMinus8; 1918 unsigned long Reserved1[4]; /**< Reserved for future use - set to zero */ 1919 /** 1920 * area of the frame that should be displayed 1921 */ 1922 struct { 1923 short left; 1924 short top; 1925 short right; 1926 short bottom; 1927 } display_area; 1928 1929 cudaVideoSurfaceFormat OutputFormat; /**< cudaVideoSurfaceFormat_XXX */ 1930 cudaVideoDeinterlaceMode DeinterlaceMode; /**< cudaVideoDeinterlaceMode_XXX */ 1931 unsigned long ulTargetWidth; /**< Post-processed Output Width (Should be aligned to 2) */ 1932 unsigned long ulTargetHeight; /**< Post-processed Output Height (Should be aligbed to 2) */ 1933 unsigned long ulNumOutputSurfaces; /**< Maximum number of output surfaces simultaneously mapped */ 1934 CUvideoctxlock vidLock; /**< If non-NULL, context lock used for synchronizing ownership of the cuda context */ 1935 /** 1936 * target rectangle in the output frame (for aspect ratio conversion) 1937 * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used 1938 */ 1939 struct { 1940 short left; 1941 short top; 1942 short right; 1943 short bottom; 1944 } target_rect; 1945 unsigned long Reserved2[5]; /**< Reserved for future use - set to zero */ 1946 } CUVIDDECODECREATEINFO; 1947 1948 /*! 1949 * \struct CUVIDH264DPBENTRY 1950 * H.264 DPB Entry 1951 */ 1952 typedef struct _CUVIDH264DPBENTRY 1953 { 1954 int PicIdx; /**< picture index of reference frame */ 1955 int FrameIdx; /**< frame_num(short-term) or LongTermFrameIdx(long-term) */ 1956 int is_long_term; /**< 0=short term reference, 1=long term reference */ 1957 int not_existing; /**< non-existing reference frame (corresponding PicIdx should be set to -1) */ 1958 int used_for_reference; /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */ 1959 int FieldOrderCnt[2]; /**< field order count of top and bottom fields */ 1960 } CUVIDH264DPBENTRY; 1961 1962 /*! 1963 * \struct CUVIDH264MVCEXT 1964 * H.264 MVC Picture Parameters Ext 1965 */ 1966 typedef struct _CUVIDH264MVCEXT 1967 { 1968 int num_views_minus1; 1969 int view_id; 1970 unsigned char inter_view_flag; 1971 unsigned char num_inter_view_refs_l0; 1972 unsigned char num_inter_view_refs_l1; 1973 unsigned char MVCReserved8Bits; 1974 int InterViewRefsL0[16]; 1975 int InterViewRefsL1[16]; 1976 } CUVIDH264MVCEXT; 1977 1978 /*! 1979 * \struct CUVIDH264SVCEXT 1980 * H.264 SVC Picture Parameters Ext 1981 */ 1982 typedef struct _CUVIDH264SVCEXT 1983 { 1984 unsigned char profile_idc; 1985 unsigned char level_idc; 1986 unsigned char DQId; 1987 unsigned char DQIdMax; 1988 unsigned char disable_inter_layer_deblocking_filter_idc; 1989 unsigned char ref_layer_chroma_phase_y_plus1; 1990 signed char inter_layer_slice_alpha_c0_offset_div2; 1991 signed char inter_layer_slice_beta_offset_div2; 1992 1993 unsigned short DPBEntryValidFlag; 1994 unsigned char inter_layer_deblocking_filter_control_present_flag; 1995 unsigned char extended_spatial_scalability_idc; 1996 unsigned char adaptive_tcoeff_level_prediction_flag; 1997 unsigned char slice_header_restriction_flag; 1998 unsigned char chroma_phase_x_plus1_flag; 1999 unsigned char chroma_phase_y_plus1; 2000 2001 unsigned char tcoeff_level_prediction_flag; 2002 unsigned char constrained_intra_resampling_flag; 2003 unsigned char ref_layer_chroma_phase_x_plus1_flag; 2004 unsigned char store_ref_base_pic_flag; 2005 unsigned char Reserved8BitsA; 2006 unsigned char Reserved8BitsB; 2007 // For the 4 scaled_ref_layer_XX fields below, 2008 // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_" 2009 // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4, 2010 short scaled_ref_layer_left_offset; 2011 short scaled_ref_layer_top_offset; 2012 short scaled_ref_layer_right_offset; 2013 short scaled_ref_layer_bottom_offset; 2014 unsigned short Reserved16Bits; 2015 struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. */ 2016 int bRefBaseLayer; /**< whether to store ref base pic */ 2017 } CUVIDH264SVCEXT; 2018 2019 /*! 2020 * \struct CUVIDH264PICPARAMS 2021 * H.264 Picture Parameters 2022 */ 2023 typedef struct _CUVIDH264PICPARAMS 2024 { 2025 // SPS 2026 int log2_max_frame_num_minus4; 2027 int pic_order_cnt_type; 2028 int log2_max_pic_order_cnt_lsb_minus4; 2029 int delta_pic_order_always_zero_flag; 2030 int frame_mbs_only_flag; 2031 int direct_8x8_inference_flag; 2032 int num_ref_frames; // NOTE: shall meet level 4.1 restrictions 2033 unsigned char residual_colour_transform_flag; 2034 unsigned char bit_depth_luma_minus8; // Must be 0 (only 8-bit supported) 2035 unsigned char bit_depth_chroma_minus8; // Must be 0 (only 8-bit supported) 2036 unsigned char qpprime_y_zero_transform_bypass_flag; 2037 // PPS 2038 int entropy_coding_mode_flag; 2039 int pic_order_present_flag; 2040 int num_ref_idx_l0_active_minus1; 2041 int num_ref_idx_l1_active_minus1; 2042 int weighted_pred_flag; 2043 int weighted_bipred_idc; 2044 int pic_init_qp_minus26; 2045 int deblocking_filter_control_present_flag; 2046 int redundant_pic_cnt_present_flag; 2047 int transform_8x8_mode_flag; 2048 int MbaffFrameFlag; 2049 int constrained_intra_pred_flag; 2050 int chroma_qp_index_offset; 2051 int second_chroma_qp_index_offset; 2052 int ref_pic_flag; 2053 int frame_num; 2054 int CurrFieldOrderCnt[2]; 2055 // DPB 2056 CUVIDH264DPBENTRY dpb[16]; // List of reference frames within the DPB 2057 // Quantization Matrices (raster-order) 2058 unsigned char WeightScale4x4[6][16]; 2059 unsigned char WeightScale8x8[2][64]; 2060 // FMO/ASO 2061 unsigned char fmo_aso_enable; 2062 unsigned char num_slice_groups_minus1; 2063 unsigned char slice_group_map_type; 2064 signed char pic_init_qs_minus26; 2065 unsigned int slice_group_change_rate_minus1; 2066 union 2067 { 2068 unsigned long long slice_group_map_addr; 2069 const unsigned char *pMb2SliceGroupMap; 2070 } fmo; 2071 unsigned int Reserved[12]; 2072 // SVC/MVC 2073 union 2074 { 2075 CUVIDH264MVCEXT mvcext; 2076 CUVIDH264SVCEXT svcext; 2077 }; 2078 } CUVIDH264PICPARAMS; 2079 2080 2081 /*! 2082 * \struct CUVIDMPEG2PICPARAMS 2083 * MPEG-2 Picture Parameters 2084 */ 2085 typedef struct _CUVIDMPEG2PICPARAMS 2086 { 2087 int ForwardRefIdx; // Picture index of forward reference (P/B-frames) 2088 int BackwardRefIdx; // Picture index of backward reference (B-frames) 2089 int picture_coding_type; 2090 int full_pel_forward_vector; 2091 int full_pel_backward_vector; 2092 int f_code[2][2]; 2093 int intra_dc_precision; 2094 int frame_pred_frame_dct; 2095 int concealment_motion_vectors; 2096 int q_scale_type; 2097 int intra_vlc_format; 2098 int alternate_scan; 2099 int top_field_first; 2100 // Quantization matrices (raster order) 2101 unsigned char QuantMatrixIntra[64]; 2102 unsigned char QuantMatrixInter[64]; 2103 } CUVIDMPEG2PICPARAMS; 2104 2105 //////////////////////////////////////////////////////////////////////////////////////////////// 2106 // 2107 // MPEG-4 Picture Parameters 2108 // 2109 2110 // MPEG-4 has VOP types instead of Picture types 2111 #define I_VOP 0 2112 #define P_VOP 1 2113 #define B_VOP 2 2114 #define S_VOP 3 2115 2116 /*! 2117 * \struct CUVIDMPEG4PICPARAMS 2118 * MPEG-4 Picture Parameters 2119 */ 2120 typedef struct _CUVIDMPEG4PICPARAMS 2121 { 2122 int ForwardRefIdx; // Picture index of forward reference (P/B-frames) 2123 int BackwardRefIdx; // Picture index of backward reference (B-frames) 2124 // VOL 2125 int video_object_layer_width; 2126 int video_object_layer_height; 2127 int vop_time_increment_bitcount; 2128 int top_field_first; 2129 int resync_marker_disable; 2130 int quant_type; 2131 int quarter_sample; 2132 int short_video_header; 2133 int divx_flags; 2134 // VOP 2135 int vop_coding_type; 2136 int vop_coded; 2137 int vop_rounding_type; 2138 int alternate_vertical_scan_flag; 2139 int interlaced; 2140 int vop_fcode_forward; 2141 int vop_fcode_backward; 2142 int trd[2]; 2143 int trb[2]; 2144 // Quantization matrices (raster order) 2145 unsigned char QuantMatrixIntra[64]; 2146 unsigned char QuantMatrixInter[64]; 2147 int gmc_enabled; 2148 } CUVIDMPEG4PICPARAMS; 2149 2150 /*! 2151 * \struct CUVIDVC1PICPARAMS 2152 * VC1 Picture Parameters 2153 */ 2154 typedef struct _CUVIDVC1PICPARAMS 2155 { 2156 int ForwardRefIdx; /**< Picture index of forward reference (P/B-frames) */ 2157 int BackwardRefIdx; /**< Picture index of backward reference (B-frames) */ 2158 int FrameWidth; /**< Actual frame width */ 2159 int FrameHeight; /**< Actual frame height */ 2160 // PICTURE 2161 int intra_pic_flag; /**< Set to 1 for I,BI frames */ 2162 int ref_pic_flag; /**< Set to 1 for I,P frames */ 2163 int progressive_fcm; /**< Progressive frame */ 2164 // SEQUENCE 2165 int profile; 2166 int postprocflag; 2167 int pulldown; 2168 int interlace; 2169 int tfcntrflag; 2170 int finterpflag; 2171 int psf; 2172 int multires; 2173 int syncmarker; 2174 int rangered; 2175 int maxbframes; 2176 // ENTRYPOINT 2177 int panscan_flag; 2178 int refdist_flag; 2179 int extended_mv; 2180 int dquant; 2181 int vstransform; 2182 int loopfilter; 2183 int fastuvmc; 2184 int overlap; 2185 int quantizer; 2186 int extended_dmv; 2187 int range_mapy_flag; 2188 int range_mapy; 2189 int range_mapuv_flag; 2190 int range_mapuv; 2191 int rangeredfrm; // range reduction state 2192 } CUVIDVC1PICPARAMS; 2193 2194 /*! 2195 * \struct CUVIDJPEGPICPARAMS 2196 * JPEG Picture Parameters 2197 */ 2198 typedef struct _CUVIDJPEGPICPARAMS 2199 { 2200 int Reserved; 2201 } CUVIDJPEGPICPARAMS; 2202 2203 2204 /*! 2205 * \struct CUVIDHEVCPICPARAMS 2206 * HEVC Picture Parameters 2207 */ 2208 typedef struct _CUVIDHEVCPICPARAMS 2209 { 2210 // sps 2211 int pic_width_in_luma_samples; 2212 int pic_height_in_luma_samples; 2213 unsigned char log2_min_luma_coding_block_size_minus3; 2214 unsigned char log2_diff_max_min_luma_coding_block_size; 2215 unsigned char log2_min_transform_block_size_minus2; 2216 unsigned char log2_diff_max_min_transform_block_size; 2217 unsigned char pcm_enabled_flag; 2218 unsigned char log2_min_pcm_luma_coding_block_size_minus3; 2219 unsigned char log2_diff_max_min_pcm_luma_coding_block_size; 2220 unsigned char pcm_sample_bit_depth_luma_minus1; 2221 2222 unsigned char pcm_sample_bit_depth_chroma_minus1; 2223 unsigned char pcm_loop_filter_disabled_flag; 2224 unsigned char strong_intra_smoothing_enabled_flag; 2225 unsigned char max_transform_hierarchy_depth_intra; 2226 unsigned char max_transform_hierarchy_depth_inter; 2227 unsigned char amp_enabled_flag; 2228 unsigned char separate_colour_plane_flag; 2229 unsigned char log2_max_pic_order_cnt_lsb_minus4; 2230 2231 unsigned char num_short_term_ref_pic_sets; 2232 unsigned char long_term_ref_pics_present_flag; 2233 unsigned char num_long_term_ref_pics_sps; 2234 unsigned char sps_temporal_mvp_enabled_flag; 2235 unsigned char sample_adaptive_offset_enabled_flag; 2236 unsigned char scaling_list_enable_flag; 2237 unsigned char IrapPicFlag; 2238 unsigned char IdrPicFlag; 2239 2240 unsigned char bit_depth_luma_minus8; 2241 unsigned char bit_depth_chroma_minus8; 2242 unsigned char reserved1[14]; 2243 2244 // pps 2245 unsigned char dependent_slice_segments_enabled_flag; 2246 unsigned char slice_segment_header_extension_present_flag; 2247 unsigned char sign_data_hiding_enabled_flag; 2248 unsigned char cu_qp_delta_enabled_flag; 2249 unsigned char diff_cu_qp_delta_depth; 2250 signed char init_qp_minus26; 2251 signed char pps_cb_qp_offset; 2252 signed char pps_cr_qp_offset; 2253 2254 unsigned char constrained_intra_pred_flag; 2255 unsigned char weighted_pred_flag; 2256 unsigned char weighted_bipred_flag; 2257 unsigned char transform_skip_enabled_flag; 2258 unsigned char transquant_bypass_enabled_flag; 2259 unsigned char entropy_coding_sync_enabled_flag; 2260 unsigned char log2_parallel_merge_level_minus2; 2261 unsigned char num_extra_slice_header_bits; 2262 2263 unsigned char loop_filter_across_tiles_enabled_flag; 2264 unsigned char loop_filter_across_slices_enabled_flag; 2265 unsigned char output_flag_present_flag; 2266 unsigned char num_ref_idx_l0_default_active_minus1; 2267 unsigned char num_ref_idx_l1_default_active_minus1; 2268 unsigned char lists_modification_present_flag; 2269 unsigned char cabac_init_present_flag; 2270 unsigned char pps_slice_chroma_qp_offsets_present_flag; 2271 2272 unsigned char deblocking_filter_override_enabled_flag; 2273 unsigned char pps_deblocking_filter_disabled_flag; 2274 signed char pps_beta_offset_div2; 2275 signed char pps_tc_offset_div2; 2276 unsigned char tiles_enabled_flag; 2277 unsigned char uniform_spacing_flag; 2278 unsigned char num_tile_columns_minus1; 2279 unsigned char num_tile_rows_minus1; 2280 2281 unsigned short column_width_minus1[21]; 2282 unsigned short row_height_minus1[21]; 2283 unsigned int reserved3[15]; 2284 2285 // RefPicSets 2286 int NumBitsForShortTermRPSInSlice; 2287 int NumDeltaPocsOfRefRpsIdx; 2288 int NumPocTotalCurr; 2289 int NumPocStCurrBefore; 2290 int NumPocStCurrAfter; 2291 int NumPocLtCurr; 2292 int CurrPicOrderCntVal; 2293 int RefPicIdx[16]; // [refpic] Indices of valid reference pictures (-1 if unused for reference) 2294 int PicOrderCntVal[16]; // [refpic] 2295 unsigned char IsLongTerm[16]; // [refpic] 0=not a long-term reference, 1=long-term reference 2296 unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15) 2297 unsigned char RefPicSetStCurrAfter[8]; // [0..NumPocStCurrAfter-1] -> refpic (0..15) 2298 unsigned char RefPicSetLtCurr[8]; // [0..NumPocLtCurr-1] -> refpic (0..15) 2299 unsigned char RefPicSetInterLayer0[8]; 2300 unsigned char RefPicSetInterLayer1[8]; 2301 unsigned int reserved4[12]; 2302 2303 // scaling lists (diag order) 2304 unsigned char ScalingList4x4[6][16]; // [matrixId][i] 2305 unsigned char ScalingList8x8[6][64]; // [matrixId][i] 2306 unsigned char ScalingList16x16[6][64]; // [matrixId][i] 2307 unsigned char ScalingList32x32[2][64]; // [matrixId][i] 2308 unsigned char ScalingListDCCoeff16x16[6]; // [matrixId] 2309 unsigned char ScalingListDCCoeff32x32[2]; // [matrixId] 2310 } CUVIDHEVCPICPARAMS; 2311 2312 2313 /*! 2314 * \struct CUVIDVP8PICPARAMS 2315 * VP8 Picture Parameters 2316 */ 2317 typedef struct _CUVIDVP8PICPARAMS 2318 { 2319 int width; 2320 int height; 2321 unsigned int first_partition_size; 2322 //Frame Indexes 2323 unsigned char LastRefIdx; 2324 unsigned char GoldenRefIdx; 2325 unsigned char AltRefIdx; 2326 union { 2327 struct { 2328 unsigned char frame_type : 1; /**< 0 = KEYFRAME, 1 = INTERFRAME */ 2329 unsigned char version : 3; 2330 unsigned char show_frame : 1; 2331 unsigned char update_mb_segmentation_data : 1; /**< Must be 0 if segmentation is not enabled */ 2332 unsigned char Reserved2Bits : 2; 2333 }; 2334 unsigned char wFrameTagFlags; 2335 }; 2336 unsigned char Reserved1[4]; 2337 unsigned int Reserved2[3]; 2338 } CUVIDVP8PICPARAMS; 2339 2340 /*! 2341 * \struct CUVIDVP9PICPARAMS 2342 * VP9 Picture Parameters 2343 */ 2344 typedef struct _CUVIDVP9PICPARAMS 2345 { 2346 unsigned int width; 2347 unsigned int height; 2348 2349 //Frame Indices 2350 unsigned char LastRefIdx; 2351 unsigned char GoldenRefIdx; 2352 unsigned char AltRefIdx; 2353 unsigned char colorSpace; 2354 2355 unsigned short profile : 3; 2356 unsigned short frameContextIdx : 2; 2357 unsigned short frameType : 1; 2358 unsigned short showFrame : 1; 2359 unsigned short errorResilient : 1; 2360 unsigned short frameParallelDecoding : 1; 2361 unsigned short subSamplingX : 1; 2362 unsigned short subSamplingY : 1; 2363 unsigned short intraOnly : 1; 2364 unsigned short allow_high_precision_mv : 1; 2365 unsigned short refreshEntropyProbs : 1; 2366 unsigned short reserved2Bits : 2; 2367 2368 unsigned short reserved16Bits; 2369 2370 unsigned char refFrameSignBias[4]; 2371 2372 unsigned char bitDepthMinus8Luma; 2373 unsigned char bitDepthMinus8Chroma; 2374 unsigned char loopFilterLevel; 2375 unsigned char loopFilterSharpness; 2376 2377 unsigned char modeRefLfEnabled; 2378 unsigned char log2_tile_columns; 2379 unsigned char log2_tile_rows; 2380 2381 unsigned char segmentEnabled : 1; 2382 unsigned char segmentMapUpdate : 1; 2383 unsigned char segmentMapTemporalUpdate : 1; 2384 unsigned char segmentFeatureMode : 1; 2385 unsigned char reserved4Bits : 4; 2386 2387 2388 unsigned char segmentFeatureEnable[8][4]; 2389 short segmentFeatureData[8][4]; 2390 unsigned char mb_segment_tree_probs[7]; 2391 unsigned char segment_pred_probs[3]; 2392 unsigned char reservedSegment16Bits[2]; 2393 2394 int qpYAc; 2395 int qpYDc; 2396 int qpChDc; 2397 int qpChAc; 2398 2399 unsigned int activeRefIdx[3]; 2400 unsigned int resetFrameContext; 2401 unsigned int mcomp_filter_type; 2402 unsigned int mbRefLfDelta[4]; 2403 unsigned int mbModeLfDelta[2]; 2404 unsigned int frameTagSize; 2405 unsigned int offsetToDctParts; 2406 unsigned int reserved128Bits[4]; 2407 2408 } CUVIDVP9PICPARAMS; 2409 2410 2411 /*! 2412 * \struct CUVIDPICPARAMS 2413 * Picture Parameters for Decoding 2414 */ 2415 typedef struct _CUVIDPICPARAMS 2416 { 2417 int PicWidthInMbs; /**< Coded Frame Size */ 2418 int FrameHeightInMbs; /**< Coded Frame Height */ 2419 int CurrPicIdx; /**< Output index of the current picture */ 2420 int field_pic_flag; /**< 0=frame picture, 1=field picture */ 2421 int bottom_field_flag; /**< 0=top field, 1=bottom field (ignored if field_pic_flag=0) */ 2422 int second_field; /**< Second field of a complementary field pair */ 2423 // Bitstream data 2424 unsigned int nBitstreamDataLen; /**< Number of bytes in bitstream data buffer */ 2425 const unsigned char *pBitstreamData; /**< Ptr to bitstream data for this picture (slice-layer) */ 2426 unsigned int nNumSlices; /**< Number of slices in this picture */ 2427 const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains offset of each slice within the bitstream data buffer */ 2428 int ref_pic_flag; /**< This picture is a reference picture */ 2429 int intra_pic_flag; /**< This picture is entirely intra coded */ 2430 unsigned int Reserved[30]; /**< Reserved for future use */ 2431 // Codec-specific data 2432 union { 2433 CUVIDMPEG2PICPARAMS mpeg2; /**< Also used for MPEG-1 */ 2434 CUVIDH264PICPARAMS h264; 2435 CUVIDVC1PICPARAMS vc1; 2436 CUVIDMPEG4PICPARAMS mpeg4; 2437 CUVIDJPEGPICPARAMS jpeg; 2438 CUVIDHEVCPICPARAMS hevc; 2439 CUVIDVP8PICPARAMS vp8; 2440 CUVIDVP9PICPARAMS vp9; 2441 unsigned int CodecReserved[1024]; 2442 } CodecSpecific; 2443 } CUVIDPICPARAMS; 2444 2445 2446 /*! 2447 * \struct CUVIDPROCPARAMS 2448 * Picture Parameters for Postprocessing 2449 */ 2450 typedef struct _CUVIDPROCPARAMS 2451 { 2452 int progressive_frame; /**< Input is progressive (deinterlace_mode will be ignored) */ 2453 int second_field; /**< Output the second field (ignored if deinterlace mode is Weave) */ 2454 int top_field_first; /**< Input frame is top field first (1st field is top, 2nd field is bottom) */ 2455 int unpaired_field; /**< Input only contains one field (2nd field is invalid) */ 2456 // The fields below are used for raw YUV input 2457 unsigned int reserved_flags; /**< Reserved for future use (set to zero) */ 2458 unsigned int reserved_zero; /**< Reserved (set to zero) */ 2459 unsigned long long raw_input_dptr; /**< Input CUdeviceptr for raw YUV extensions */ 2460 unsigned int raw_input_pitch; /**< pitch in bytes of raw YUV input (should be aligned appropriately) */ 2461 unsigned int raw_input_format; /**< Reserved for future use (set to zero) */ 2462 unsigned long long raw_output_dptr; /**< Reserved for future use (set to zero) */ 2463 unsigned int raw_output_pitch; /**< Reserved for future use (set to zero) */ 2464 unsigned int Reserved[48]; 2465 void *Reserved3[3]; 2466 } CUVIDPROCPARAMS; 2467 2468 2469 /** 2470 * 2471 * In order to minimize decode latencies, there should be always at least 2 pictures in the decode 2472 * queue at any time, in order to make sure that all decode engines are always busy. 2473 * 2474 * Overall data flow: 2475 * - cuvidCreateDecoder(...) 2476 * For each picture: 2477 * - cuvidDecodePicture(N) 2478 * - cuvidMapVideoFrame(N-4) 2479 * - do some processing in cuda 2480 * - cuvidUnmapVideoFrame(N-4) 2481 * - cuvidDecodePicture(N+1) 2482 * - cuvidMapVideoFrame(N-3) 2483 * ... 2484 * - cuvidDestroyDecoder(...) 2485 * 2486 * NOTE: 2487 * - When the cuda context is created from a D3D device, the D3D device must also be created 2488 * with the D3DCREATE_MULTITHREADED flag. 2489 * - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces) 2490 * - cuVidDecodePicture may block the calling thread if there are too many pictures pending 2491 * in the decode queue 2492 */ 2493 2494 /** 2495 * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci) 2496 * Create the decoder object 2497 */ 2498 typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci); 2499 2500 /** 2501 * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder) 2502 * Destroy the decoder object 2503 */ 2504 typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder); 2505 2506 /** 2507 * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams) 2508 * Decode a single picture (field or frame) 2509 */ 2510 typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams); 2511 2512 2513 #if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL) 2514 /** 2515 * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); 2516 * Post-process and map a video frame for use in cuda 2517 */ 2518 typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, 2519 unsigned int *pDevPtr, unsigned int *pPitch, 2520 CUVIDPROCPARAMS *pVPP); 2521 2522 /** 2523 * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr) 2524 * Unmap a previously mapped video frame 2525 */ 2526 typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr); 2527 #endif 2528 2529 #if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) 2530 /** 2531 * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); 2532 * map a video frame 2533 */ 2534 typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, 2535 unsigned int *pPitch, CUVIDPROCPARAMS *pVPP); 2536 2537 /** 2538 * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); 2539 * Unmap a previously mapped video frame 2540 */ 2541 typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr); 2542 2543 #if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL) 2544 #define tcuvidMapVideoFrame tcuvidMapVideoFrame64 2545 #define tcuvidUnmapVideoFrame tcuvidUnmapVideoFrame64 2546 #endif 2547 #endif 2548 2549 2550 2551 /** 2552 * 2553 * Context-locking: to facilitate multi-threaded implementations, the following 4 functions 2554 * provide a simple mutex-style host synchronization. If a non-NULL context is specified 2555 * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given 2556 * context before making any cuda calls. 2557 * A multi-threaded application could create a lock associated with a context handle so that 2558 * multiple threads can safely share the same cuda context: 2559 * - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context 2560 * that can be passed to cuvidCtxLockCreate. 2561 * - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section. 2562 * 2563 * NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video 2564 * decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls). 2565 */ 2566 2567 /** 2568 * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx) 2569 */ 2570 typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx); 2571 2572 /** 2573 * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck) 2574 */ 2575 typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck); 2576 2577 /** 2578 * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags) 2579 */ 2580 typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags); 2581 2582 /** 2583 * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags) 2584 */ 2585 typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags); 2586 2587 /** @} */ /* End VIDEO_DECODER */ 2588 //////////////////////////////////////////////////////////////////////////////////////////////// 2589 2590 2591 extern tcuvidCreateDecoder *cuvidCreateDecoder; 2592 extern tcuvidDestroyDecoder *cuvidDestroyDecoder; 2593 extern tcuvidDecodePicture *cuvidDecodePicture; 2594 extern tcuvidMapVideoFrame *cuvidMapVideoFrame; 2595 extern tcuvidUnmapVideoFrame *cuvidUnmapVideoFrame; 2596 2597 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) 2598 extern tcuvidMapVideoFrame64 *cuvidMapVideoFrame64; 2599 extern tcuvidUnmapVideoFrame64 *cuvidUnmapVideoFrame64; 2600 #endif 2601 2602 //extern tcuvidGetVideoFrameSurface *cuvidGetVideoFrameSurface; 2603 2604 extern tcuvidCtxLockCreate *cuvidCtxLockCreate; 2605 extern tcuvidCtxLockDestroy *cuvidCtxLockDestroy; 2606 extern tcuvidCtxLock *cuvidCtxLock; 2607 extern tcuvidCtxUnlock *cuvidCtxUnlock; 2608 2609 //////////////////////////////////////////////////////////////////////////////////////////////// 2610 // 2611 // High-level helper APIs for video sources 2612 // 2613 2614 typedef void *CUvideosource; 2615 typedef void *CUvideoparser; 2616 typedef long long CUvideotimestamp; 2617 2618 /** 2619 * \addtogroup VIDEO_PARSER Video Parser 2620 * @{ 2621 */ 2622 2623 /*! 2624 * \enum cudaVideoState 2625 * Video Source State 2626 */ 2627 typedef enum { 2628 cudaVideoState_Error = -1, /**< Error state (invalid source) */ 2629 cudaVideoState_Stopped = 0, /**< Source is stopped (or reached end-of-stream) */ 2630 cudaVideoState_Started = 1 /**< Source is running and delivering data */ 2631 } cudaVideoState; 2632 2633 /*! 2634 * \enum cudaAudioCodec 2635 * Audio compression 2636 */ 2637 typedef enum { 2638 cudaAudioCodec_MPEG1=0, /**< MPEG-1 Audio */ 2639 cudaAudioCodec_MPEG2, /**< MPEG-2 Audio */ 2640 cudaAudioCodec_MP3, /**< MPEG-1 Layer III Audio */ 2641 cudaAudioCodec_AC3, /**< Dolby Digital (AC3) Audio */ 2642 cudaAudioCodec_LPCM /**< PCM Audio */ 2643 } cudaAudioCodec; 2644 2645 /*! 2646 * \struct CUVIDEOFORMAT 2647 * Video format 2648 */ 2649 typedef struct 2650 { 2651 cudaVideoCodec codec; /**< Compression format */ 2652 /** 2653 * frame rate = numerator / denominator (for example: 30000/1001) 2654 */ 2655 struct { 2656 unsigned int numerator; /**< frame rate numerator (0 = unspecified or variable frame rate) */ 2657 unsigned int denominator; /**< frame rate denominator (0 = unspecified or variable frame rate) */ 2658 } frame_rate; 2659 unsigned char progressive_sequence; /**< 0=interlaced, 1=progressive */ 2660 unsigned char bit_depth_luma_minus8; /**< high bit depth Luma */ 2661 unsigned char bit_depth_chroma_minus8; /**< high bit depth Chroma */ 2662 unsigned char reserved1; /**< Reserved for future use */ 2663 unsigned int coded_width; /**< coded frame width */ 2664 unsigned int coded_height; /**< coded frame height */ 2665 /** 2666 * area of the frame that should be displayed 2667 * typical example: 2668 * coded_width = 1920, coded_height = 1088 2669 * display_area = { 0,0,1920,1080 } 2670 */ 2671 struct { 2672 int left; /**< left position of display rect */ 2673 int top; /**< top position of display rect */ 2674 int right; /**< right position of display rect */ 2675 int bottom; /**< bottom position of display rect */ 2676 } display_area; 2677 cudaVideoChromaFormat chroma_format; /**< Chroma format */ 2678 unsigned int bitrate; /**< video bitrate (bps, 0=unknown) */ 2679 /** 2680 * Display Aspect Ratio = x:y (4:3, 16:9, etc) 2681 */ 2682 struct { 2683 int x; 2684 int y; 2685 } display_aspect_ratio; 2686 /** 2687 * Video Signal Description 2688 */ 2689 struct { 2690 unsigned char video_format : 3; 2691 unsigned char video_full_range_flag : 1; 2692 unsigned char reserved_zero_bits : 4; 2693 unsigned char color_primaries; 2694 unsigned char transfer_characteristics; 2695 unsigned char matrix_coefficients; 2696 } video_signal_description; 2697 unsigned int seqhdr_data_length; /**< Additional bytes following (CUVIDEOFORMATEX) */ 2698 } CUVIDEOFORMAT; 2699 2700 /*! 2701 * \struct CUVIDEOFORMATEX 2702 * Video format including raw sequence header information 2703 */ 2704 typedef struct 2705 { 2706 CUVIDEOFORMAT format; 2707 unsigned char raw_seqhdr_data[1024]; 2708 } CUVIDEOFORMATEX; 2709 2710 /*! 2711 * \struct CUAUDIOFORMAT 2712 * Audio Formats 2713 */ 2714 typedef struct 2715 { 2716 cudaAudioCodec codec; /**< Compression format */ 2717 unsigned int channels; /**< number of audio channels */ 2718 unsigned int samplespersec; /**< sampling frequency */ 2719 unsigned int bitrate; /**< For uncompressed, can also be used to determine bits per sample */ 2720 unsigned int reserved1; /**< Reserved for future use */ 2721 unsigned int reserved2; /**< Reserved for future use */ 2722 } CUAUDIOFORMAT; 2723 2724 2725 /*! 2726 * \enum CUvideopacketflags 2727 * Data packet flags 2728 */ 2729 typedef enum { 2730 CUVID_PKT_ENDOFSTREAM = 0x01, /**< Set when this is the last packet for this stream */ 2731 CUVID_PKT_TIMESTAMP = 0x02, /**< Timestamp is valid */ 2732 CUVID_PKT_DISCONTINUITY = 0x04 /**< Set when a discontinuity has to be signalled */ 2733 } CUvideopacketflags; 2734 2735 /*! 2736 * \struct CUVIDSOURCEDATAPACKET 2737 * Data Packet 2738 */ 2739 typedef struct _CUVIDSOURCEDATAPACKET 2740 { 2741 unsigned long flags; /**< Combination of CUVID_PKT_XXX flags */ 2742 unsigned long payload_size; /**< number of bytes in the payload (may be zero if EOS flag is set) */ 2743 const unsigned char *payload; /**< Pointer to packet payload data (may be NULL if EOS flag is set) */ 2744 CUvideotimestamp timestamp; /**< Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set */ 2745 } CUVIDSOURCEDATAPACKET; 2746 2747 // Callback for packet delivery 2748 typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *); 2749 2750 /*! 2751 * \struct CUVIDSOURCEPARAMS 2752 * Source Params 2753 */ 2754 typedef struct _CUVIDSOURCEPARAMS 2755 { 2756 unsigned int ulClockRate; /**< Timestamp units in Hz (0=default=10000000Hz) */ 2757 unsigned int uReserved1[7]; /**< Reserved for future use - set to zero */ 2758 void *pUserData; /**< Parameter passed in to the data handlers */ 2759 PFNVIDSOURCECALLBACK pfnVideoDataHandler; /**< Called to deliver audio packets */ 2760 PFNVIDSOURCECALLBACK pfnAudioDataHandler; /**< Called to deliver video packets */ 2761 void *pvReserved2[8]; /**< Reserved for future use - set to NULL */ 2762 } CUVIDSOURCEPARAMS; 2763 2764 /*! 2765 * \enum CUvideosourceformat_flags 2766 * CUvideosourceformat_flags 2767 */ 2768 typedef enum { 2769 CUVID_FMT_EXTFORMATINFO = 0x100 /**< Return extended format structure (CUVIDEOFORMATEX) */ 2770 } CUvideosourceformat_flags; 2771 2772 #if !defined(__APPLE__) 2773 /** 2774 * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams) 2775 * Create Video Source 2776 */ 2777 typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams); 2778 2779 /** 2780 * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams) 2781 * Create Video Source 2782 */ 2783 typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams); 2784 2785 /** 2786 * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj) 2787 * Destroy Video Source 2788 */ 2789 typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj); 2790 2791 /** 2792 * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state) 2793 * Set Video Source state 2794 */ 2795 typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state); 2796 2797 /** 2798 * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj) 2799 * Get Video Source state 2800 */ 2801 typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj); 2802 2803 /** 2804 * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags) 2805 * Get Video Source Format 2806 */ 2807 typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags); 2808 2809 /** 2810 * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags) 2811 * Set Video Source state 2812 */ 2813 typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags); 2814 2815 #endif 2816 2817 /** 2818 * \struct CUVIDPARSERDISPINFO 2819 */ 2820 typedef struct _CUVIDPARSERDISPINFO 2821 { 2822 int picture_index; /**< */ 2823 int progressive_frame; /**< */ 2824 int top_field_first; /**< */ 2825 int repeat_first_field; /**< Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field) */ 2826 CUvideotimestamp timestamp; /**< */ 2827 } CUVIDPARSERDISPINFO; 2828 2829 // 2830 // Parser callbacks 2831 // The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to 2832 // be decoded and/or displayed. 2833 // 2834 typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *); 2835 typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *); 2836 typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *); 2837 2838 /** 2839 * \struct CUVIDPARSERPARAMS 2840 */ 2841 typedef struct _CUVIDPARSERPARAMS 2842 { 2843 cudaVideoCodec CodecType; /**< cudaVideoCodec_XXX */ 2844 unsigned int ulMaxNumDecodeSurfaces; /**< Max # of decode surfaces (parser will cycle through these) */ 2845 unsigned int ulClockRate; /**< Timestamp units in Hz (0=default=10000000Hz) */ 2846 unsigned int ulErrorThreshold; /**< % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) */ 2847 unsigned int ulMaxDisplayDelay; /**< Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) */ 2848 unsigned int uReserved1[5]; /**< Reserved for future use - set to 0 */ 2849 void *pUserData; /**< User data for callbacks */ 2850 PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding frames and/or whenever there is a format change */ 2851 PFNVIDDECODECALLBACK pfnDecodePicture; /**< Called when a picture is ready to be decoded (decode order) */ 2852 PFNVIDDISPLAYCALLBACK pfnDisplayPicture; /**< Called whenever a picture is ready to be displayed (display order) */ 2853 void *pvReserved2[7]; /**< Reserved for future use - set to NULL */ 2854 CUVIDEOFORMATEX *pExtVideoInfo; /**< [Optional] sequence header data from system layer */ 2855 } CUVIDPARSERPARAMS; 2856 2857 /** 2858 * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams) 2859 */ 2860 typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams); 2861 2862 /** 2863 * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket) 2864 */ 2865 typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket); 2866 2867 /** 2868 * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj) 2869 */ 2870 typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj); 2871 2872 #if !defined(__APPLE__) 2873 extern tcuvidCreateVideoSource *cuvidCreateVideoSource; 2874 extern tcuvidCreateVideoSourceW *cuvidCreateVideoSourceW; 2875 extern tcuvidDestroyVideoSource *cuvidDestroyVideoSource; 2876 extern tcuvidSetVideoSourceState *cuvidSetVideoSourceState; 2877 extern tcuvidGetVideoSourceState *cuvidGetVideoSourceState; 2878 extern tcuvidGetSourceVideoFormat *cuvidGetSourceVideoFormat; 2879 extern tcuvidGetSourceAudioFormat *cuvidGetSourceAudioFormat; 2880 #endif 2881 2882 2883 extern tcuvidCreateVideoParser *cuvidCreateVideoParser; 2884 extern tcuvidParseVideoData *cuvidParseVideoData; 2885 extern tcuvidDestroyVideoParser *cuvidDestroyVideoParser; 2886 2887 /** @} */ /* END VIDEO_PARSER */ 2888 //////////////////////////////////////////////////////////////////////////////////////////////// 2889 2890 const char *cudaGetErrorEnum(CUresult error); 2891 2892 #ifdef __cplusplus 2893 } 2894 #endif 2895 2896 #endif // defined(WIN32) || defined(GPAC_CONFIG_LINUX) 2897 2898 #endif //__cuda_tools_h__ 2899 2900