1 /*
2  * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
3  *
4  * Please refer to the NVIDIA end user license agreement (EULA) associated
5  * with this source code for terms and conditions that govern your use of
6  * this software. Any use, reproduction, disclosure, or distribution of
7  * this software and related documentation outside the terms of the EULA
8  * is strictly prohibited.
9  *
10  */
11 
12 #ifndef __cuda_tools_h__
13 #define __cuda_tools_h__
14 
15 #include <stdlib.h>
16 
17 #if defined(WIN32) || defined(GPAC_CONFIG_LINUX) || defined(GPAC_CONFIG_DARWIN)
18 
19 #ifdef __cplusplus
20 extern "C" {
21 #endif
22 
23 //needed for dec_nvdec_sdk.h which uses GL prototypes
24 #ifndef GPAC_DISABLE_3D
25 #include "../compositor/gl_inc.h"
26 #else
27 typedef u32 GLuint;
28 typedef u32 GLenum;
29 #endif
30 
31 #ifndef __CUDA_API_VERSION
32 #define __CUDA_API_VERSION 4000
33 #endif
34 
35 /**
36  * \defgroup CUDA_DRIVER CUDA Driver API
37  *
38  * This section describes the low-level CUDA driver application programming
39  * interface.
40  *
41  * @{
42  */
43 
44 /**
45  * \defgroup CUDA_TYPES Data types used by CUDA driver
46  * @{
47  */
48 
49 /**
50  * CUDA API version number
51  */
52 #define CUDA_VERSION 4000 /* 4.0 */
53 
54 /**
55  * CUDA device pointer
56  */
57 #if __CUDA_API_VERSION >= 3020
58 
59 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__aarch64__)
60     typedef unsigned long long CUdeviceptr;
61 #else
62     typedef unsigned int CUdeviceptr;
63 #endif
64 
65 #endif /* __CUDA_API_VERSION >= 3020 */
66 
67 typedef int CUdevice;                                     /**< CUDA device */
68 typedef struct CUctx_st *CUcontext;                       /**< CUDA context */
69 typedef struct CUmod_st *CUmodule;                        /**< CUDA module */
70 typedef struct CUfunc_st *CUfunction;                     /**< CUDA function */
71 typedef struct CUarray_st *CUarray;                       /**< CUDA array */
72 typedef struct CUtexref_st *CUtexref;                     /**< CUDA texture reference */
73 typedef struct CUsurfref_st *CUsurfref;                   /**< CUDA surface reference */
74 typedef struct CUevent_st *CUevent;                       /**< CUDA event */
75 typedef struct CUstream_st *CUstream;                     /**< CUDA stream */
76 typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphics interop resource */
77 
78 typedef struct CUuuid_st                                  /**< CUDA definition of UUID */
79 {
80     char bytes[16];
81 } CUuuid;
82 
83 /**
84  * Context creation flags
85  */
86 typedef enum CUctx_flags_enum
87 {
88     CU_CTX_SCHED_AUTO          = 0x00, /**< Automatic scheduling */
89     CU_CTX_SCHED_SPIN          = 0x01, /**< Set spin as default scheduling */
90     CU_CTX_SCHED_YIELD         = 0x02, /**< Set yield as default scheduling */
91     CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */
92     CU_CTX_BLOCKING_SYNC       = 0x04, /**< Set blocking synchronization as default scheduling \deprecated */
93     CU_CTX_MAP_HOST            = 0x08, /**< Support mapped pinned allocations */
94     CU_CTX_LMEM_RESIZE_TO_MAX  = 0x10, /**< Keep local memory allocation after launch */
95 #if __CUDA_API_VERSION < 4000
96     CU_CTX_SCHED_MASK          = 0x03,
97     CU_CTX_FLAGS_MASK          = 0x1f
98 #else
99     CU_CTX_SCHED_MASK          = 0x07,
100     CU_CTX_PRIMARY             = 0x20, /**< Initialize and return the primary context */
101     CU_CTX_FLAGS_MASK          = 0x3f
102 #endif
103 } CUctx_flags;
104 
105 /**
106  * Event creation flags
107  */
108 typedef enum CUevent_flags_enum
109 {
110     CU_EVENT_DEFAULT        = 0, /**< Default event flag */
111     CU_EVENT_BLOCKING_SYNC  = 1, /**< Event uses blocking synchronization */
112     CU_EVENT_DISABLE_TIMING = 2  /**< Event will not record timing data */
113 } CUevent_flags;
114 
115 /**
116  * Array formats
117  */
118 typedef enum CUarray_format_enum
119 {
120     CU_AD_FORMAT_UNSIGNED_INT8  = 0x01, /**< Unsigned 8-bit integers */
121     CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */
122     CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */
123     CU_AD_FORMAT_SIGNED_INT8    = 0x08, /**< Signed 8-bit integers */
124     CU_AD_FORMAT_SIGNED_INT16   = 0x09, /**< Signed 16-bit integers */
125     CU_AD_FORMAT_SIGNED_INT32   = 0x0a, /**< Signed 32-bit integers */
126     CU_AD_FORMAT_HALF           = 0x10, /**< 16-bit floating point */
127     CU_AD_FORMAT_FLOAT          = 0x20  /**< 32-bit floating point */
128 } CUarray_format;
129 
130 /**
131  * Texture reference addressing modes
132  */
133 typedef enum CUaddress_mode_enum
134 {
135     CU_TR_ADDRESS_MODE_WRAP   = 0, /**< Wrapping address mode */
136     CU_TR_ADDRESS_MODE_CLAMP  = 1, /**< Clamp to edge address mode */
137     CU_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */
138     CU_TR_ADDRESS_MODE_BORDER = 3  /**< Border address mode */
139 } CUaddress_mode;
140 
141 /**
142  * Texture reference filtering modes
143  */
144 typedef enum CUfilter_mode_enum
145 {
146     CU_TR_FILTER_MODE_POINT  = 0, /**< Point filter mode */
147     CU_TR_FILTER_MODE_LINEAR = 1  /**< Linear filter mode */
148 } CUfilter_mode;
149 
150 /**
151  * Device properties
152  */
153 typedef enum CUdevice_attribute_enum
154 {
155     CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1,              /**< Maximum number of threads per block */
156     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2,                    /**< Maximum block dimension X */
157     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3,                    /**< Maximum block dimension Y */
158     CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4,                    /**< Maximum block dimension Z */
159     CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5,                     /**< Maximum grid dimension X */
160     CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6,                     /**< Maximum grid dimension Y */
161     CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7,                     /**< Maximum grid dimension Z */
162     CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8,        /**< Maximum shared memory available per block in bytes */
163     CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8,            /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */
164     CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9,              /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */
165     CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10,                         /**< Warp size in threads */
166     CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11,                         /**< Maximum pitch in bytes allowed by memory copies */
167     CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12,           /**< Maximum number of 32-bit registers available per block */
168     CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12,               /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */
169     CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13,                        /**< Peak clock frequency in kilohertz */
170     CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,                 /**< Alignment requirement for textures */
171     CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15,                       /**< Device can possibly copy memory and execute a kernel concurrently */
172     CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16,              /**< Number of multiprocessors on device */
173     CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17,               /**< Specifies whether there is a run time limit on kernels */
174     CU_DEVICE_ATTRIBUTE_INTEGRATED = 18,                        /**< Device is integrated with host memory */
175     CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19,               /**< Device can map host memory into CUDA address space */
176     CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20,                      /**< Compute mode (See ::CUcomputemode for details) */
177     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21,           /**< Maximum 1D texture width */
178     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22,           /**< Maximum 2D texture width */
179     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23,          /**< Maximum 2D texture height */
180     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24,           /**< Maximum 3D texture width */
181     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25,          /**< Maximum 3D texture height */
182     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26,           /**< Maximum 3D texture depth */
183     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27,     /**< Maximum texture array width */
184     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28,    /**< Maximum texture array height */
185     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximum slices in a texture array */
186     CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30,                 /**< Alignment requirement for surfaces */
187     CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31,                /**< Device can possibly execute multiple kernels concurrently */
188     CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32,                       /**< Device has ECC support enabled */
189     CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33,                        /**< PCI bus ID of the device */
190     CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34,                     /**< PCI device ID of the device */
191     CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35                         /**< Device is using TCC driver model */
192 #if __CUDA_API_VERSION >= 4000
193   , CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36,                 /**< Peak memory clock frequency in kilohertz */
194     CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37,           /**< Global memory bus width in bits */
195     CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38,                     /**< Size of L2 cache in bytes */
196     CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39,    /**< Maximum resident threads per multiprocessor */
197     CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40,                /**< Number of asynchronous engines */
198     CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,                /**< Device uses shares a unified address space with the host */
199     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42,   /**< Maximum 1D layered texture width */
200     CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43   /**< Maximum layers in a 1D layered texture */
201 #endif
202 } CUdevice_attribute;
203 
204 /**
205  * Legacy device properties
206  */
207 typedef struct CUdevprop_st
208 {
209     int maxThreadsPerBlock;     /**< Maximum number of threads per block */
210     int maxThreadsDim[3];       /**< Maximum size of each dimension of a block */
211     int maxGridSize[3];         /**< Maximum size of each dimension of a grid */
212     int sharedMemPerBlock;      /**< Shared memory available per block in bytes */
213     int totalConstantMemory;    /**< Constant memory available on device in bytes */
214     int SIMDWidth;              /**< Warp size in threads */
215     int memPitch;               /**< Maximum pitch in bytes allowed by memory copies */
216     int regsPerBlock;           /**< 32-bit registers available per block */
217     int clockRate;              /**< Clock frequency in kilohertz */
218     int textureAlign;           /**< Alignment requirement for textures */
219 } CUdevprop;
220 
221 /**
222  * Function properties
223  */
224 typedef enum CUfunction_attribute_enum
225 {
226     /**
227      * The maximum number of threads per block, beyond which a launch of the
228      * function would fail. This number depends on both the function and the
229      * device on which the function is currently loaded.
230      */
231     CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0,
232 
233     /**
234      * The size in bytes of statically-allocated shared memory required by
235      * this function. This does not include dynamically-allocated shared
236      * memory requested by the user at runtime.
237      */
238     CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1,
239 
240     /**
241      * The size in bytes of user-allocated constant memory required by this
242      * function.
243      */
244     CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2,
245 
246     /**
247      * The size in bytes of local memory used by each thread of this function.
248      */
249     CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3,
250 
251     /**
252      * The number of registers used by each thread of this function.
253      */
254     CU_FUNC_ATTRIBUTE_NUM_REGS = 4,
255 
256     /**
257      * The PTX virtual architecture version for which the function was
258      * compiled. This value is the major PTX version * 10 + the minor PTX
259      * version, so a PTX version 1.3 function would return the value 13.
260      * Note that this may return the undefined value of 0 for cubins
261      * compiled prior to CUDA 3.0.
262      */
263     CU_FUNC_ATTRIBUTE_PTX_VERSION = 5,
264 
265     /**
266      * The binary architecture version for which the function was compiled.
267      * This value is the major binary version * 10 + the minor binary version,
268      * so a binary version 1.3 function would return the value 13. Note that
269      * this will return a value of 10 for legacy cubins that do not have a
270      * properly-encoded binary architecture version.
271      */
272     CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6,
273 
274     CU_FUNC_ATTRIBUTE_MAX
275 } CUfunction_attribute;
276 
277 /**
278  * Function cache configurations
279  */
280 typedef enum CUfunc_cache_enum
281 {
282     CU_FUNC_CACHE_PREFER_NONE    = 0x00, /**< no preference for shared memory or L1 (default) */
283     CU_FUNC_CACHE_PREFER_SHARED  = 0x01, /**< prefer larger shared memory and smaller L1 cache */
284     CU_FUNC_CACHE_PREFER_L1      = 0x02  /**< prefer larger L1 cache and smaller shared memory */
285 } CUfunc_cache;
286 
287 /**
288  * Memory types
289  */
290 typedef enum CUmemorytype_enum
291 {
292     CU_MEMORYTYPE_HOST    = 0x01,    /**< Host memory */
293     CU_MEMORYTYPE_DEVICE  = 0x02,    /**< Device memory */
294     CU_MEMORYTYPE_ARRAY   = 0x03     /**< Array memory */
295 #if __CUDA_API_VERSION >= 4000
296   , CU_MEMORYTYPE_UNIFIED = 0x04     /**< Unified device or host memory */
297 #endif
298 } CUmemorytype;
299 
300 /**
301  * Compute Modes
302  */
303 typedef enum CUcomputemode_enum
304 {
305     CU_COMPUTEMODE_DEFAULT           = 0,  /**< Default compute mode (Multiple contexts allowed per device) */
306     CU_COMPUTEMODE_EXCLUSIVE         = 1, /**< Compute-exclusive-thread mode (Only one context used by a single thread can be present on this device at a time) */
307     CU_COMPUTEMODE_PROHIBITED        = 2  /**< Compute-prohibited mode (No contexts can be created on this device at this time) */
308 #if __CUDA_API_VERSION >= 4000
309   , CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3  /**< Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time) */
310 #endif
311 } CUcomputemode;
312 
313 /**
314  * Online compiler options
315  */
316 typedef enum CUjit_option_enum
317 {
318     /**
319      * Max number of registers that a thread may use.\n
320      * Option type: unsigned int
321      */
322     CU_JIT_MAX_REGISTERS = 0,
323 
324     /**
325      * IN: Specifies minimum number of threads per block to target compilation
326      * for\n
327      * OUT: Returns the number of threads the compiler actually targeted.
328      * This restricts the resource utilization fo the compiler (e.g. max
329      * registers) such that a block with the given number of threads should be
330      * able to launch based on register limitations. Note, this option does not
331      * currently take into account any other resource limitations, such as
332      * shared memory utilization.\n
333      * Option type: unsigned int
334      */
335     CU_JIT_THREADS_PER_BLOCK,
336 
337     /**
338      * Returns a float value in the option of the wall clock time, in
339      * milliseconds, spent creating the cubin\n
340      * Option type: float
341      */
342     CU_JIT_WALL_TIME,
343 
344     /**
345      * Pointer to a buffer in which to print any log messsages from PTXAS
346      * that are informational in nature (the buffer size is specified via
347      * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) \n
348      * Option type: char*
349      */
350     CU_JIT_INFO_LOG_BUFFER,
351 
352     /**
353      * IN: Log buffer size in bytes.  Log messages will be capped at this size
354      * (including null terminator)\n
355      * OUT: Amount of log buffer filled with messages\n
356      * Option type: unsigned int
357      */
358     CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
359 
360     /**
361      * Pointer to a buffer in which to print any log messages from PTXAS that
362      * reflect errors (the buffer size is specified via option
363      * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n
364      * Option type: char*
365      */
366     CU_JIT_ERROR_LOG_BUFFER,
367 
368     /**
369      * IN: Log buffer size in bytes.  Log messages will be capped at this size
370      * (including null terminator)\n
371      * OUT: Amount of log buffer filled with messages\n
372      * Option type: unsigned int
373      */
374     CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES,
375 
376     /**
377      * Level of optimizations to apply to generated code (0 - 4), with 4
378      * being the default and highest level of optimizations.\n
379      * Option type: unsigned int
380      */
381     CU_JIT_OPTIMIZATION_LEVEL,
382 
383     /**
384      * No option value required. Determines the target based on the current
385      * attached context (default)\n
386      * Option type: No option value needed
387      */
388     CU_JIT_TARGET_FROM_CUCONTEXT,
389 
390     /**
391      * Target is chosen based on supplied ::CUjit_target_enum.\n
392      * Option type: unsigned int for enumerated type ::CUjit_target_enum
393      */
394     CU_JIT_TARGET,
395 
396     /**
397      * Specifies choice of fallback strategy if matching cubin is not found.
398      * Choice is based on supplied ::CUjit_fallback_enum.\n
399      * Option type: unsigned int for enumerated type ::CUjit_fallback_enum
400      */
401     CU_JIT_FALLBACK_STRATEGY
402 
403 } CUjit_option;
404 
405 /**
406  * Online compilation targets
407  */
408 typedef enum CUjit_target_enum
409 {
410     CU_TARGET_COMPUTE_10 = 0,   /**< Compute device class 1.0 */
411     CU_TARGET_COMPUTE_11,       /**< Compute device class 1.1 */
412     CU_TARGET_COMPUTE_12,       /**< Compute device class 1.2 */
413     CU_TARGET_COMPUTE_13,       /**< Compute device class 1.3 */
414     CU_TARGET_COMPUTE_20,       /**< Compute device class 2.0 */
415     CU_TARGET_COMPUTE_21        /**< Compute device class 2.1 */
416 } CUjit_target;
417 
418 /**
419  * Cubin matching fallback strategies
420  */
421 typedef enum CUjit_fallback_enum
422 {
423     CU_PREFER_PTX = 0,  /**< Prefer to compile ptx */
424 
425     CU_PREFER_BINARY    /**< Prefer to fall back to compatible binary code */
426 
427 } CUjit_fallback;
428 
429 /**
430  * Flags to register a graphics resource
431  */
432 typedef enum CUgraphicsRegisterFlags_enum
433 {
434     CU_GRAPHICS_REGISTER_FLAGS_NONE          = 0x00,
435     CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY     = 0x01,
436     CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
437     CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST  = 0x04
438 } CUgraphicsRegisterFlags;
439 
440 /**
441  * Flags for mapping and unmapping interop resources
442  */
443 typedef enum CUgraphicsMapResourceFlags_enum
444 {
445     CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE          = 0x00,
446     CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
447     CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
448 } CUgraphicsMapResourceFlags;
449 
450 /**
451  * Array indices for cube faces
452  */
453 typedef enum CUarray_cubemap_face_enum
454 {
455     CU_CUBEMAP_FACE_POSITIVE_X  = 0x00, /**< Positive X face of cubemap */
456     CU_CUBEMAP_FACE_NEGATIVE_X  = 0x01, /**< Negative X face of cubemap */
457     CU_CUBEMAP_FACE_POSITIVE_Y  = 0x02, /**< Positive Y face of cubemap */
458     CU_CUBEMAP_FACE_NEGATIVE_Y  = 0x03, /**< Negative Y face of cubemap */
459     CU_CUBEMAP_FACE_POSITIVE_Z  = 0x04, /**< Positive Z face of cubemap */
460     CU_CUBEMAP_FACE_NEGATIVE_Z  = 0x05  /**< Negative Z face of cubemap */
461 } CUarray_cubemap_face;
462 
463 /**
464  * Limits
465  */
466 typedef enum CUlimit_enum
467 {
468     CU_LIMIT_STACK_SIZE        = 0x00, /**< GPU thread stack size */
469     CU_LIMIT_PRINTF_FIFO_SIZE  = 0x01, /**< GPU printf FIFO size */
470     CU_LIMIT_MALLOC_HEAP_SIZE  = 0x02  /**< GPU malloc heap size */
471 } CUlimit;
472 
473 /**
474  * Error codes
475  */
476 typedef enum cudaError_enum
477 {
478     /**
479      * The API call returned with no errors. In the case of query calls, this
480      * can also mean that the operation being queried is complete (see
481      * ::cuEventQuery() and ::cuStreamQuery()).
482      */
483     CUDA_SUCCESS                              = 0,
484 
485     /**
486      * This indicates that one or more of the parameters passed to the API call
487      * is not within an acceptable range of values.
488      */
489     CUDA_ERROR_INVALID_VALUE                  = 1,
490 
491     /**
492      * The API call failed because it was unable to allocate enough memory to
493      * perform the requested operation.
494      */
495     CUDA_ERROR_OUT_OF_MEMORY                  = 2,
496 
497     /**
498      * This indicates that the CUDA driver has not been initialized with
499      * ::cuInit() or that initialization has failed.
500      */
501     CUDA_ERROR_NOT_INITIALIZED                = 3,
502 
503     /**
504      * This indicates that the CUDA driver is in the process of shutting down.
505      */
506     CUDA_ERROR_DEINITIALIZED                  = 4,
507 
508     /**
509      * This indicates profiling APIs are called while application is running
510      * in visual profiler mode.
511     */
512     CUDA_ERROR_PROFILER_DISABLED           = 5,
513     /**
514      * This indicates profiling has not been initialized for this context.
515      * Call cuProfilerInitialize() to resolve this.
516     */
517     CUDA_ERROR_PROFILER_NOT_INITIALIZED       = 6,
518     /**
519      * This indicates profiler has already been started and probably
520      * cuProfilerStart() is incorrectly called.
521     */
522     CUDA_ERROR_PROFILER_ALREADY_STARTED       = 7,
523     /**
524      * This indicates profiler has already been stopped and probably
525      * cuProfilerStop() is incorrectly called.
526     */
527     CUDA_ERROR_PROFILER_ALREADY_STOPPED       = 8,
528     /**
529      * This indicates that no CUDA-capable devices were detected by the installed
530      * CUDA driver.
531      */
532     CUDA_ERROR_NO_DEVICE                      = 100,
533 
534     /**
535      * This indicates that the device ordinal supplied by the user does not
536      * correspond to a valid CUDA device.
537      */
538     CUDA_ERROR_INVALID_DEVICE                 = 101,
539 
540 
541     /**
542      * This indicates that the device kernel image is invalid. This can also
543      * indicate an invalid CUDA module.
544      */
545     CUDA_ERROR_INVALID_IMAGE                  = 200,
546 
547     /**
548      * This most frequently indicates that there is no context bound to the
549      * current thread. This can also be returned if the context passed to an
550      * API call is not a valid handle (such as a context that has had
551      * ::cuCtxDestroy() invoked on it). This can also be returned if a user
552      * mixes different API versions (i.e. 3010 context with 3020 API calls).
553      * See ::cuCtxGetApiVersion() for more details.
554      */
555     CUDA_ERROR_INVALID_CONTEXT                = 201,
556 
557     /**
558      * This indicated that the context being supplied as a parameter to the
559      * API call was already the active context.
560      * \deprecated
561      * This error return is deprecated as of CUDA 3.2. It is no longer an
562      * error to attempt to push the active context via ::cuCtxPushCurrent().
563      */
564     CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
565 
566     /**
567      * This indicates that a map or register operation has failed.
568      */
569     CUDA_ERROR_MAP_FAILED                     = 205,
570 
571     /**
572      * This indicates that an unmap or unregister operation has failed.
573      */
574     CUDA_ERROR_UNMAP_FAILED                   = 206,
575 
576     /**
577      * This indicates that the specified array is currently mapped and thus
578      * cannot be destroyed.
579      */
580     CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
581 
582     /**
583      * This indicates that the resource is already mapped.
584      */
585     CUDA_ERROR_ALREADY_MAPPED                 = 208,
586 
587     /**
588      * This indicates that there is no kernel image available that is suitable
589      * for the device. This can occur when a user specifies code generation
590      * options for a particular CUDA source file that do not include the
591      * corresponding device configuration.
592      */
593     CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
594 
595     /**
596      * This indicates that a resource has already been acquired.
597      */
598     CUDA_ERROR_ALREADY_ACQUIRED               = 210,
599 
600     /**
601      * This indicates that a resource is not mapped.
602      */
603     CUDA_ERROR_NOT_MAPPED                     = 211,
604 
605     /**
606      * This indicates that a mapped resource is not available for access as an
607      * array.
608      */
609     CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
610 
611     /**
612      * This indicates that a mapped resource is not available for access as a
613      * pointer.
614      */
615     CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
616 
617     /**
618      * This indicates that an uncorrectable ECC error was detected during
619      * execution.
620      */
621     CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
622 
623     /**
624      * This indicates that the ::CUlimit passed to the API call is not
625      * supported by the active device.
626      */
627     CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
628 
629     /**
630      * This indicates that the ::CUcontext passed to the API call can
631      * only be bound to a single CPU thread at a time but is already
632      * bound to a CPU thread.
633      */
634     CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
635 
636     /**
637      * This indicates that the device kernel source is invalid.
638      */
639     CUDA_ERROR_INVALID_SOURCE                 = 300,
640 
641     /**
642      * This indicates that the file specified was not found.
643      */
644     CUDA_ERROR_FILE_NOT_FOUND                 = 301,
645 
646     /**
647      * This indicates that a link to a shared object failed to resolve.
648      */
649     CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
650 
651     /**
652      * This indicates that initialization of a shared object failed.
653      */
654     CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
655 
656     /**
657      * This indicates that an OS call failed.
658      */
659     CUDA_ERROR_OPERATING_SYSTEM               = 304,
660 
661 
662     /**
663      * This indicates that a resource handle passed to the API call was not
664      * valid. Resource handles are opaque types like ::CUstream and ::CUevent.
665      */
666     CUDA_ERROR_INVALID_HANDLE                 = 400,
667 
668 
669     /**
670      * This indicates that a named symbol was not found. Examples of symbols
671      * are global/constant variable names, texture names, and surface names.
672      */
673     CUDA_ERROR_NOT_FOUND                      = 500,
674 
675 
676     /**
677      * This indicates that asynchronous operations issued previously have not
678      * completed yet. This result is not actually an error, but must be indicated
679      * differently than ::CUDA_SUCCESS (which indicates completion). Calls that
680      * may return this value include ::cuEventQuery() and ::cuStreamQuery().
681      */
682     CUDA_ERROR_NOT_READY                      = 600,
683 
684 
685     /**
686      * An exception occurred on the device while executing a kernel. Common
687      * causes include dereferencing an invalid device pointer and accessing
688      * out of bounds shared memory. The context cannot be used, so it must
689      * be destroyed (and a new one should be created). All existing device
690      * memory allocations from this context are invalid and must be
691      * reconstructed if the program is to continue using CUDA.
692      */
693     CUDA_ERROR_LAUNCH_FAILED                  = 700,
694 
695     /**
696      * This indicates that a launch did not occur because it did not have
697      * appropriate resources. This error usually indicates that the user has
698      * attempted to pass too many arguments to the device kernel, or the
699      * kernel launch specifies too many threads for the kernel's register
700      * count. Passing arguments of the wrong size (i.e. a 64-bit pointer
701      * when a 32-bit int is expected) is equivalent to passing too many
702      * arguments and can also result in this error.
703      */
704     CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
705 
706     /**
707      * This indicates that the device kernel took too long to execute. This can
708      * only occur if timeouts are enabled - see the device attribute
709      * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The
710      * context cannot be used (and must be destroyed similar to
711      * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from
712      * this context are invalid and must be reconstructed if the program is to
713      * continue using CUDA.
714      */
715     CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
716 
717     /**
718      * This error indicates a kernel launch that uses an incompatible texturing
719      * mode.
720      */
721     CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
722 
723     /**
724      * This error indicates that a call to ::cuCtxEnablePeerAccess() is
725      * trying to re-enable peer access to a context which has already
726      * had peer access to it enabled.
727      */
728     CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704,
729 
730     /**
731      * This error indicates that a call to ::cuMemPeerRegister is trying to
732      * register memory from a context which has not had peer access
733      * enabled yet via ::cuCtxEnablePeerAccess(), or that
734      * ::cuCtxDisablePeerAccess() is trying to disable peer access
735      * which has not been enabled yet.
736      */
737     CUDA_ERROR_PEER_ACCESS_NOT_ENABLED    = 705,
738 
739     /**
740      * This error indicates that a call to ::cuMemPeerRegister is trying to
741      * register already-registered memory.
742      */
743     CUDA_ERROR_PEER_MEMORY_ALREADY_REGISTERED = 706,
744 
745     /**
746      * This error indicates that a call to ::cuMemPeerUnregister is trying to
747      * unregister memory that has not been registered.
748      */
749     CUDA_ERROR_PEER_MEMORY_NOT_REGISTERED     = 707,
750 
751     /**
752      * This error indicates that ::cuCtxCreate was called with the flag
753      * ::CU_CTX_PRIMARY on a device which already has initialized its
754      * primary context.
755      */
756     CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
757 
758     /**
759      * This error indicates that the context current to the calling thread
760      * has been destroyed using ::cuCtxDestroy, or is a primary context which
761      * has not yet been initialized.
762      */
763     CUDA_ERROR_CONTEXT_IS_DESTROYED           = 709,
764 
765     /**
766      * This indicates that an unknown internal error has occurred.
767      */
768     CUDA_ERROR_UNKNOWN                        = 999
769 } CUresult;
770 
771 #if __CUDA_API_VERSION >= 4000
772 /**
773  * If set, host memory is portable between CUDA contexts.
774  * Flag for ::cuMemHostAlloc()
775  */
776 #define CU_MEMHOSTALLOC_PORTABLE        0x01
777 
778 /**
779  * If set, host memory is mapped into CUDA address space and
780  * ::cuMemHostGetDevicePointer() may be called on the host pointer.
781  * Flag for ::cuMemHostAlloc()
782  */
783 #define CU_MEMHOSTALLOC_DEVICEMAP       0x02
784 
785 /**
786  * If set, host memory is allocated as write-combined - fast to write,
787  * faster to DMA, slow to read except via SSE4 streaming load instruction
788  * (MOVNTDQA).
789  * Flag for ::cuMemHostAlloc()
790  */
791 #define CU_MEMHOSTALLOC_WRITECOMBINED   0x04
792 
793 /**
794  * If set, host memory is portable between CUDA contexts.
795  * Flag for ::cuMemHostRegister()
796  */
797 #define CU_MEMHOSTREGISTER_PORTABLE     0x01
798 
799 /**
800  * If set, host memory is mapped into CUDA address space and
801  * ::cuMemHostGetDevicePointer() may be called on the host pointer.
802  * Flag for ::cuMemHostRegister()
803  */
804 #define CU_MEMHOSTREGISTER_DEVICEMAP    0x02
805 
806 /**
807  * If set, peer memory is mapped into CUDA address space and
808  * ::cuMemPeerGetDevicePointer() may be called on the host pointer.
809  * Flag for ::cuMemPeerRegister()
810  */
811 #define CU_MEMPEERREGISTER_DEVICEMAP    0x02
812 #endif
813 
814 #if __CUDA_API_VERSION >= 3020
815 /**
816  * 2D memory copy parameters
817  */
818 typedef struct CUDA_MEMCPY2D_st
819 {
820     size_t srcXInBytes;         /**< Source X in bytes */
821     size_t srcY;                /**< Source Y */
822 
823     CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
824     const void *srcHost;        /**< Source host pointer */
825     CUdeviceptr srcDevice;      /**< Source device pointer */
826     CUarray srcArray;           /**< Source array reference */
827     size_t srcPitch;            /**< Source pitch (ignored when src is array) */
828 
829     size_t dstXInBytes;         /**< Destination X in bytes */
830     size_t dstY;                /**< Destination Y */
831 
832     CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
833     void *dstHost;              /**< Destination host pointer */
834     CUdeviceptr dstDevice;      /**< Destination device pointer */
835     CUarray dstArray;           /**< Destination array reference */
836     size_t dstPitch;            /**< Destination pitch (ignored when dst is array) */
837 
838     size_t WidthInBytes;        /**< Width of 2D memory copy in bytes */
839     size_t Height;              /**< Height of 2D memory copy */
840 } CUDA_MEMCPY2D;
841 
842 /**
843  * 3D memory copy parameters
844  */
845 typedef struct CUDA_MEMCPY3D_st
846 {
847     size_t srcXInBytes;         /**< Source X in bytes */
848     size_t srcY;                /**< Source Y */
849     size_t srcZ;                /**< Source Z */
850     size_t srcLOD;              /**< Source LOD */
851     CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
852     const void *srcHost;        /**< Source host pointer */
853     CUdeviceptr srcDevice;      /**< Source device pointer */
854     CUarray srcArray;           /**< Source array reference */
855     void *reserved0;            /**< Must be NULL */
856     size_t srcPitch;            /**< Source pitch (ignored when src is array) */
857     size_t srcHeight;           /**< Source height (ignored when src is array; may be 0 if Depth==1) */
858 
859     size_t dstXInBytes;         /**< Destination X in bytes */
860     size_t dstY;                /**< Destination Y */
861     size_t dstZ;                /**< Destination Z */
862     size_t dstLOD;              /**< Destination LOD */
863     CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
864     void *dstHost;              /**< Destination host pointer */
865     CUdeviceptr dstDevice;      /**< Destination device pointer */
866     CUarray dstArray;           /**< Destination array reference */
867     void *reserved1;            /**< Must be NULL */
868     size_t dstPitch;            /**< Destination pitch (ignored when dst is array) */
869     size_t dstHeight;           /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */
870 
871     size_t WidthInBytes;        /**< Width of 3D memory copy in bytes */
872     size_t Height;              /**< Height of 3D memory copy */
873     size_t Depth;               /**< Depth of 3D memory copy */
874 } CUDA_MEMCPY3D;
875 
876 /**
877  * 3D memory cross-context copy parameters
878  */
879 typedef struct CUDA_MEMCPY3D_PEER_st
880 {
881     size_t srcXInBytes;         /**< Source X in bytes */
882     size_t srcY;                /**< Source Y */
883     size_t srcZ;                /**< Source Z */
884     size_t srcLOD;              /**< Source LOD */
885     CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
886     const void *srcHost;        /**< Source host pointer */
887     CUdeviceptr srcDevice;      /**< Source device pointer */
888     CUarray srcArray;           /**< Source array reference */
889     CUcontext srcContext;       /**< Source context (ignored with srcMemoryType is ::CU_MEMORYTYPE_ARRAY) */
890     size_t srcPitch;            /**< Source pitch (ignored when src is array) */
891     size_t srcHeight;           /**< Source height (ignored when src is array; may be 0 if Depth==1) */
892 
893     size_t dstXInBytes;         /**< Destination X in bytes */
894     size_t dstY;                /**< Destination Y */
895     size_t dstZ;                /**< Destination Z */
896     size_t dstLOD;              /**< Destination LOD */
897     CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
898     void *dstHost;              /**< Destination host pointer */
899     CUdeviceptr dstDevice;      /**< Destination device pointer */
900     CUarray dstArray;           /**< Destination array reference */
901     CUcontext dstContext;       /**< Destination context (ignored with dstMemoryType is ::CU_MEMORYTYPE_ARRAY) */
902     size_t dstPitch;            /**< Destination pitch (ignored when dst is array) */
903     size_t dstHeight;           /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */
904 
905     size_t WidthInBytes;        /**< Width of 3D memory copy in bytes */
906     size_t Height;              /**< Height of 3D memory copy */
907     size_t Depth;               /**< Depth of 3D memory copy */
908 } CUDA_MEMCPY3D_PEER;
909 
910 /**
911  * Array descriptor
912  */
913 typedef struct CUDA_ARRAY_DESCRIPTOR_st
914 {
915     size_t Width;             /**< Width of array */
916     size_t Height;            /**< Height of array */
917 
918     CUarray_format Format;    /**< Array format */
919     unsigned int NumChannels; /**< Channels per array element */
920 } CUDA_ARRAY_DESCRIPTOR;
921 
922 /**
923  * 3D array descriptor
924  */
925 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
926 {
927     size_t Width;             /**< Width of 3D array */
928     size_t Height;            /**< Height of 3D array */
929     size_t Depth;             /**< Depth of 3D array */
930 
931     CUarray_format Format;    /**< Array format */
932     unsigned int NumChannels; /**< Channels per array element */
933     unsigned int Flags;       /**< Flags */
934 } CUDA_ARRAY3D_DESCRIPTOR;
935 
936 #endif /* __CUDA_API_VERSION >= 3020 */
937 
938 /**
939  * If set, the CUDA array is a collection of layers, where each layer is either a 1D
940  * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number
941  * of layers, not the depth of a 3D array.
942  */
943 #define CUDA_ARRAY3D_LAYERED        0x01
944 
945 /**
946  * Deprecated, use CUDA_ARRAY3D_LAYERED
947  */
948 #define CUDA_ARRAY3D_2DARRAY        0x01
949 
950 /**
951  * This flag must be set in order to bind a surface reference
952  * to the CUDA array
953  */
954 #define CUDA_ARRAY3D_SURFACE_LDST   0x02
955 
956 /**
957  * Override the texref format with a format inferred from the array.
958  * Flag for ::cuTexRefSetArray()
959  */
960 #define CU_TRSA_OVERRIDE_FORMAT 0x01
961 
962 /**
963  * Read the texture as integers rather than promoting the values to floats
964  * in the range [0,1].
965  * Flag for ::cuTexRefSetFlags()
966  */
967 #define CU_TRSF_READ_AS_INTEGER         0x01
968 
969 /**
970  * Use normalized texture coordinates in the range [0,1) instead of [0,dim).
971  * Flag for ::cuTexRefSetFlags()
972  */
973 #define CU_TRSF_NORMALIZED_COORDINATES  0x02
974 
975 /**
976  * Perform sRGB->linear conversion during texture read.
977  * Flag for ::cuTexRefSetFlags()
978  */
979 #define CU_TRSF_SRGB  0x10
980 
981 /**
982  * End of array terminator for the \p extra parameter to
983  * ::cuLaunchKernel
984  */
985 #define CU_LAUNCH_PARAM_END            ((void*)0x00)
986 
987 /**
988  * Indicator that the next value in the \p extra parameter to
989  * ::cuLaunchKernel will be a pointer to a buffer containing all kernel
990  * parameters used for launching kernel \p f.  This buffer needs to
991  * honor all alignment/padding requirements of the individual parameters.
992  * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the
993  * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no
994  * effect.
995  */
996 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01)
997 
998 /**
999  * Indicator that the next value in the \p extra parameter to
1000  * ::cuLaunchKernel will be a pointer to a size_t which contains the
1001  * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER.
1002  * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified
1003  * in the \p extra array if the value associated with
1004  * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero.
1005  */
1006 #define CU_LAUNCH_PARAM_BUFFER_SIZE    ((void*)0x02)
1007 
1008 /**
1009  * For texture references loaded into the module, use default texunit from
1010  * texture reference.
1011  */
1012 #define CU_PARAM_TR_DEFAULT -1
1013 
1014 /**
1015  * CUDA API made obselete at API version 3020
1016  */
1017 #if defined(__CUDA_API_VERSION_INTERNAL)
1018     #define CUdeviceptr                  CUdeviceptr_v1
1019     #define CUDA_MEMCPY2D_st             CUDA_MEMCPY2D_v1_st
1020     #define CUDA_MEMCPY2D                CUDA_MEMCPY2D_v1
1021     #define CUDA_MEMCPY3D_st             CUDA_MEMCPY3D_v1_st
1022     #define CUDA_MEMCPY3D                CUDA_MEMCPY3D_v1
1023     #define CUDA_ARRAY_DESCRIPTOR_st     CUDA_ARRAY_DESCRIPTOR_v1_st
1024     #define CUDA_ARRAY_DESCRIPTOR        CUDA_ARRAY_DESCRIPTOR_v1
1025     #define CUDA_ARRAY3D_DESCRIPTOR_st   CUDA_ARRAY3D_DESCRIPTOR_v1_st
1026     #define CUDA_ARRAY3D_DESCRIPTOR      CUDA_ARRAY3D_DESCRIPTOR_v1
1027 #endif /* CUDA_FORCE_LEGACY32_INTERNAL */
1028 
1029 #if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020
1030 typedef unsigned int CUdeviceptr;
1031 
1032 typedef struct CUDA_MEMCPY2D_st
1033 {
1034     unsigned int srcXInBytes;   /**< Source X in bytes */
1035     unsigned int srcY;          /**< Source Y */
1036     CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
1037     const void *srcHost;        /**< Source host pointer */
1038     CUdeviceptr srcDevice;      /**< Source device pointer */
1039     CUarray srcArray;           /**< Source array reference */
1040     unsigned int srcPitch;      /**< Source pitch (ignored when src is array) */
1041 
1042     unsigned int dstXInBytes;   /**< Destination X in bytes */
1043     unsigned int dstY;          /**< Destination Y */
1044     CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
1045     void *dstHost;              /**< Destination host pointer */
1046     CUdeviceptr dstDevice;      /**< Destination device pointer */
1047     CUarray dstArray;           /**< Destination array reference */
1048     unsigned int dstPitch;      /**< Destination pitch (ignored when dst is array) */
1049 
1050     unsigned int WidthInBytes;  /**< Width of 2D memory copy in bytes */
1051     unsigned int Height;        /**< Height of 2D memory copy */
1052 } CUDA_MEMCPY2D;
1053 
1054 typedef struct CUDA_MEMCPY3D_st
1055 {
1056     unsigned int srcXInBytes;   /**< Source X in bytes */
1057     unsigned int srcY;          /**< Source Y */
1058     unsigned int srcZ;          /**< Source Z */
1059     unsigned int srcLOD;        /**< Source LOD */
1060     CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */
1061     const void *srcHost;        /**< Source host pointer */
1062     CUdeviceptr srcDevice;      /**< Source device pointer */
1063     CUarray srcArray;           /**< Source array reference */
1064     void *reserved0;            /**< Must be NULL */
1065     unsigned int srcPitch;      /**< Source pitch (ignored when src is array) */
1066     unsigned int srcHeight;     /**< Source height (ignored when src is array; may be 0 if Depth==1) */
1067 
1068     unsigned int dstXInBytes;   /**< Destination X in bytes */
1069     unsigned int dstY;          /**< Destination Y */
1070     unsigned int dstZ;          /**< Destination Z */
1071     unsigned int dstLOD;        /**< Destination LOD */
1072     CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */
1073     void *dstHost;              /**< Destination host pointer */
1074     CUdeviceptr dstDevice;      /**< Destination device pointer */
1075     CUarray dstArray;           /**< Destination array reference */
1076     void *reserved1;            /**< Must be NULL */
1077     unsigned int dstPitch;      /**< Destination pitch (ignored when dst is array) */
1078     unsigned int dstHeight;     /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */
1079 
1080     unsigned int WidthInBytes;  /**< Width of 3D memory copy in bytes */
1081     unsigned int Height;        /**< Height of 3D memory copy */
1082     unsigned int Depth;         /**< Depth of 3D memory copy */
1083 } CUDA_MEMCPY3D;
1084 
1085 typedef struct CUDA_ARRAY_DESCRIPTOR_st
1086 {
1087     unsigned int Width;         /**< Width of array */
1088     unsigned int Height;        /**< Height of array */
1089 
1090     CUarray_format Format;      /**< Array format */
1091     unsigned int NumChannels;   /**< Channels per array element */
1092 } CUDA_ARRAY_DESCRIPTOR;
1093 
1094 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st
1095 {
1096     unsigned int Width;         /**< Width of 3D array */
1097     unsigned int Height;        /**< Height of 3D array */
1098     unsigned int Depth;         /**< Depth of 3D array */
1099 
1100     CUarray_format Format;      /**< Array format */
1101     unsigned int NumChannels;   /**< Channels per array element */
1102     unsigned int Flags;         /**< Flags */
1103 } CUDA_ARRAY3D_DESCRIPTOR;
1104 
1105 #endif /* (__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020 */
1106 
1107 /*
1108  * If set, the CUDA array contains an array of 2D slices
1109  * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies
1110  * the number of slices, not the depth of a 3D array.
1111  */
1112 #define CUDA_ARRAY3D_2DARRAY        0x01
1113 
1114 /**
1115  * This flag must be set in order to bind a surface reference
1116  * to the CUDA array
1117  */
1118 #define CUDA_ARRAY3D_SURFACE_LDST   0x02
1119 
1120 /**
1121  * Override the texref format with a format inferred from the array.
1122  * Flag for ::cuTexRefSetArray()
1123  */
1124 #define CU_TRSA_OVERRIDE_FORMAT 0x01
1125 
1126 /**
1127  * Read the texture as integers rather than promoting the values to floats
1128  * in the range [0,1].
1129  * Flag for ::cuTexRefSetFlags()
1130  */
1131 #define CU_TRSF_READ_AS_INTEGER         0x01
1132 
1133 /**
1134  * Use normalized texture coordinates in the range [0,1) instead of [0,dim).
1135  * Flag for ::cuTexRefSetFlags()
1136  */
1137 #define CU_TRSF_NORMALIZED_COORDINATES  0x02
1138 
1139 /**
1140  * Perform sRGB->linear conversion during texture read.
1141  * Flag for ::cuTexRefSetFlags()
1142  */
1143 #define CU_TRSF_SRGB  0x10
1144 
1145 /**
1146  * For texture references loaded into the module, use default texunit from
1147  * texture reference.
1148  */
1149 #define CU_PARAM_TR_DEFAULT -1
1150 
1151 /** @} */ /* END CUDA_TYPES */
1152 
1153 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
1154     #define CUDAAPI __stdcall
1155 #else
1156     #define CUDAAPI
1157 #endif
1158 
1159 /**
1160  * \defgroup CUDA_INITIALIZE Initialization
1161  *
1162  * This section describes the initialization functions of the low-level CUDA
1163  * driver application programming interface.
1164  *
1165  * @{
1166  */
1167 
1168 /*********************************
1169  ** Initialization
1170  *********************************/
1171 typedef CUresult  CUDAAPI tcuInit(unsigned int Flags);
1172 
1173 /*********************************
1174  ** Driver Version Query
1175  *********************************/
1176 typedef CUresult  CUDAAPI tcuDriverGetVersion(int *driverVersion);
1177 
1178 /************************************
1179  **
1180  **    Device management
1181  **
1182  ***********************************/
1183 
1184 typedef CUresult  CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal);
1185 typedef CUresult  CUDAAPI tcuDeviceGetCount(int *count);
1186 typedef CUresult  CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev);
1187 typedef CUresult  CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev);
1188 #if __CUDA_API_VERSION >= 3020
1189     typedef CUresult  CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev);
1190 #else
1191     typedef CUresult  CUDAAPI tcuDeviceTotalMem(unsigned int *bytes, CUdevice dev);
1192 #endif
1193 
1194 typedef CUresult  CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev);
1195 typedef CUresult  CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
1196 
1197 /************************************
1198  **
1199  **    Context management
1200  **
1201  ***********************************/
1202 typedef CUresult  CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev);
1203 typedef CUresult  CUDAAPI tcuCtxDestroy(CUcontext ctx);
1204 typedef CUresult  CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags);
1205 typedef CUresult  CUDAAPI tcuCtxDetach(CUcontext ctx);
1206 typedef CUresult  CUDAAPI tcuCtxPushCurrent(CUcontext ctx);
1207 typedef CUresult  CUDAAPI tcuCtxPopCurrent(CUcontext *pctx);
1208 
1209 typedef CUresult  CUDAAPI tcuCtxSetCurrent(CUcontext ctx);
1210 typedef CUresult  CUDAAPI tcuCtxGetCurrent(CUcontext *pctx);
1211 
1212 typedef CUresult  CUDAAPI tcuCtxGetDevice(CUdevice *device);
1213 typedef CUresult  CUDAAPI tcuCtxSynchronize(void);
1214 
1215 
1216 /************************************
1217  **
1218  **    Module management
1219  **
1220  ***********************************/
1221 typedef CUresult  CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname);
1222 typedef CUresult  CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image);
1223 typedef CUresult  CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues);
1224 typedef CUresult  CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin);
1225 typedef CUresult  CUDAAPI tcuModuleUnload(CUmodule hmod);
1226 typedef CUresult  CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name);
1227 
1228 #if __CUDA_API_VERSION >= 3020
1229     typedef CUresult  CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name);
1230 #else
1231     typedef CUresult  CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *bytes, CUmodule hmod, const char *name);
1232 #endif
1233 
1234 typedef CUresult  CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name);
1235 typedef CUresult  CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name);
1236 
1237 /************************************
1238  **
1239  **    Memory management
1240  **
1241  ***********************************/
1242 #if __CUDA_API_VERSION >= 3020
1243     typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total);
1244     typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize);
1245     typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr);
1246     typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr,
1247                                               size_t *pPitch,
1248                                               size_t WidthInBytes,
1249                                               size_t Height,
1250                                               // size of biggest r/w to be performed by kernels on this memory
1251                                               // 4, 8 or 16 bytes
1252                                               unsigned int ElementSizeBytes
1253                                              );
1254 #else
1255     typedef CUresult CUDAAPI tcuMemGetInfo(unsigned int *free, unsigned int *total);
1256     typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, unsigned int bytesize);
1257     typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, unsigned int *psize, CUdeviceptr dptr);
1258     typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr,
1259                                               unsigned int *pPitch,
1260                                               unsigned int WidthInBytes,
1261                                               unsigned int Height,
1262                                               // size of biggest r/w to be performed by kernels on this memory
1263                                               // 4, 8 or 16 bytes
1264                                               unsigned int ElementSizeBytes
1265                                              );
1266 #endif
1267 
1268 typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr);
1269 
1270 #if __CUDA_API_VERSION >= 3020
1271     typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize);
1272 #else
1273     typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, unsigned int bytesize);
1274 #endif
1275 
1276 typedef CUresult CUDAAPI tcuMemFreeHost(void *p);
1277 typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags);
1278 
1279 typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags);
1280 typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p);
1281 
1282 typedef CUresult CUDAAPI tcuMemHostRegister(void *p, size_t bytesize, unsigned int Flags);
1283 typedef CUresult CUDAAPI tcuMemHostUnregister(void *p);;
1284 typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount);
1285 typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount);
1286 
1287 /************************************
1288  **
1289  **    Synchronous Memcpy
1290  **
1291  ** Intra-device memcpy's done with these functions may execute in parallel with the CPU,
1292  ** but if host memory is involved, they wait until the copy is done before returning.
1293  **
1294  ***********************************/
1295 // 1D functions
1296 #if __CUDA_API_VERSION >= 3020
1297     // system <-> device memory
1298     typedef CUresult  CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount);
1299     typedef CUresult  CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount);
1300 
1301     // device <-> device memory
1302     typedef CUresult  CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount);
1303 
1304     // device <-> array memory
1305     typedef CUresult  CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount);
1306     typedef CUresult  CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount);
1307 
1308     // system <-> array memory
1309     typedef CUresult  CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount);
1310     typedef CUresult  CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount);
1311 
1312     // array <-> array memory
1313     typedef CUresult  CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount);
1314 #else
1315     // system <-> device memory
1316     typedef CUresult  CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount);
1317     typedef CUresult  CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount);
1318 
1319     // device <-> device memory
1320     typedef CUresult  CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount);
1321 
1322     // device <-> array memory
1323     typedef CUresult  CUDAAPI tcuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUdeviceptr srcDevice, unsigned int ByteCount);
1324     typedef CUresult  CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount);
1325 
1326     // system <-> array memory
1327     typedef CUresult  CUDAAPI tcuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount);
1328     typedef CUresult  CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount);
1329 
1330     // array <-> array memory
1331     typedef CUresult  CUDAAPI tcuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount);
1332 #endif
1333 
1334 // 2D memcpy
1335 typedef CUresult  CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy);
1336 typedef CUresult  CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy);
1337 
1338 // 3D memcpy
1339 typedef CUresult  CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy);
1340 
1341 /************************************
1342  **
1343  **    Asynchronous Memcpy
1344  **
1345  ** Any host memory involved must be DMA'able (e.g., allocated with cuMemAllocHost).
1346  ** memcpy's done with these functions execute in parallel with the CPU and, if
1347  ** the hardware is available, may execute in parallel with the GPU.
1348  ** Asynchronous memcpy must be accompanied by appropriate stream synchronization.
1349  **
1350  ***********************************/
1351 
1352 // 1D functions
1353 #if __CUDA_API_VERSION >= 3020
1354     // system <-> device memory
1355     typedef CUresult  CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice,
1356                                                  const void *srcHost, size_t ByteCount, CUstream hStream);
1357     typedef CUresult  CUDAAPI tcuMemcpyDtoHAsync(void *dstHost,
1358                                                  CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
1359 
1360     // device <-> device memory
1361     typedef CUresult  CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice,
1362                                                  CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream);
1363 
1364     // system <-> array memory
1365     typedef CUresult  CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset,
1366                                                  const void *srcHost, size_t ByteCount, CUstream hStream);
1367     typedef CUresult  CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset,
1368                                                  size_t ByteCount, CUstream hStream);
1369 #else
1370     // system <-> device memory
1371     typedef CUresult  CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice,
1372                                                  const void *srcHost, unsigned int ByteCount, CUstream hStream);
1373     typedef CUresult  CUDAAPI tcuMemcpyDtoHAsync(void *dstHost,
1374                                                  CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream);
1375 
1376     // device <-> device memory
1377     typedef CUresult  CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice,
1378                                                  CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream);
1379 
1380     // system <-> array memory
1381     typedef CUresult  CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset,
1382                                                  const void *srcHost, unsigned int ByteCount, CUstream hStream);
1383     typedef CUresult  CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigned int srcOffset,
1384                                                  unsigned int ByteCount, CUstream hStream);
1385 #endif
1386 
1387 // 2D memcpy
1388 typedef CUresult  CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream);
1389 
1390 // 3D memcpy
1391 typedef CUresult  CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream);
1392 
1393 /************************************
1394  **
1395  **    Memset
1396  **
1397  ***********************************/
1398 typedef CUresult  CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, unsigned int N);
1399 typedef CUresult  CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, unsigned int N);
1400 typedef CUresult  CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, unsigned int N);
1401 
1402 #if __CUDA_API_VERSION >= 3020
1403     typedef CUresult  CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, size_t Width, size_t Height);
1404     typedef CUresult  CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, size_t Width, size_t Height);
1405     typedef CUresult  CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, size_t Width, size_t Height);
1406 #else
1407     typedef CUresult  CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, unsigned int Width, unsigned int Height);
1408     typedef CUresult  CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, unsigned int Width, unsigned int Height);
1409     typedef CUresult  CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, unsigned int Width, unsigned int Height);
1410 #endif
1411 
1412 /************************************
1413  **
1414  **    Function management
1415  **
1416  ***********************************/
1417 
1418 
1419 typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z);
1420 typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes);
1421 typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc);
1422 typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config);
1423 typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f,
1424                                          unsigned int gridDimX,  unsigned int gridDimY,  unsigned int gridDimZ,
1425                                          unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ,
1426                                          unsigned int sharedMemBytes,
1427                                          CUstream hStream, void **kernelParams, void **extra);
1428 
1429 /************************************
1430  **
1431  **    Array management
1432  **
1433  ***********************************/
1434 
1435 typedef CUresult  CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray);
1436 typedef CUresult  CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
1437 typedef CUresult  CUDAAPI tcuArrayDestroy(CUarray hArray);
1438 
1439 typedef CUresult  CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray);
1440 typedef CUresult  CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray);
1441 
1442 
1443 /************************************
1444  **
1445  **    Texture reference management
1446  **
1447  ***********************************/
1448 typedef CUresult  CUDAAPI tcuTexRefCreate(CUtexref *pTexRef);
1449 typedef CUresult  CUDAAPI tcuTexRefDestroy(CUtexref hTexRef);
1450 
1451 typedef CUresult  CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags);
1452 
1453 #if __CUDA_API_VERSION >= 3020
1454     typedef CUresult  CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes);
1455     typedef CUresult  CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch);
1456 #else
1457     typedef CUresult  CUDAAPI tcuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, unsigned int bytes);
1458     typedef CUresult  CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch);
1459 #endif
1460 
1461 typedef CUresult  CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents);
1462 typedef CUresult  CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am);
1463 typedef CUresult  CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm);
1464 typedef CUresult  CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags);
1465 
1466 typedef CUresult  CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef);
1467 typedef CUresult  CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef);
1468 typedef CUresult  CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim);
1469 typedef CUresult  CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef);
1470 typedef CUresult  CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef);
1471 typedef CUresult  CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef);
1472 
1473 /************************************
1474  **
1475  **    Surface reference management
1476  **
1477  ***********************************/
1478 typedef CUresult  CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags);
1479 typedef CUresult  CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef);
1480 
1481 /************************************
1482  **
1483  **    Parameter management
1484  **
1485  ***********************************/
1486 
1487 typedef CUresult  CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes);
1488 typedef CUresult  CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value);
1489 typedef CUresult  CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value);
1490 typedef CUresult  CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes);
1491 typedef CUresult  CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef);
1492 
1493 
1494 /************************************
1495  **
1496  **    Launch functions
1497  **
1498  ***********************************/
1499 
1500 typedef CUresult CUDAAPI tcuLaunch(CUfunction f);
1501 typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height);
1502 typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream);
1503 
1504 /************************************
1505  **
1506  **    Events
1507  **
1508  ***********************************/
1509 typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags);
1510 typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream);
1511 typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent);
1512 typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent);
1513 typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent);
1514 typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd);
1515 
1516 /************************************
1517  **
1518  **    Streams
1519  **
1520  ***********************************/
1521 typedef CUresult CUDAAPI  tcuStreamCreate(CUstream *phStream, unsigned int Flags);
1522 typedef CUresult CUDAAPI  tcuStreamQuery(CUstream hStream);
1523 typedef CUresult CUDAAPI  tcuStreamSynchronize(CUstream hStream);
1524 typedef CUresult CUDAAPI  tcuStreamDestroy(CUstream hStream);
1525 
1526 /************************************
1527  **
1528  **    Graphics interop
1529  **
1530  ***********************************/
1531 typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource);
1532 typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel);
1533 
1534 #if __CUDA_API_VERSION >= 3020
1535     typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource);
1536 #else
1537     typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, unsigned int *pSize, CUgraphicsResource resource);
1538 #endif
1539 
1540 typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags);
1541 typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
1542 typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream);
1543 
1544 /************************************
1545  **
1546  **    Export tables
1547  **
1548  ***********************************/
1549 typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId);
1550 
1551 /************************************
1552  **
1553  **    Limits
1554  **
1555  ***********************************/
1556 
1557 typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value);
1558 typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit);
1559 
1560 
1561 extern tcuDriverGetVersion             *cuDriverGetVersion;
1562 extern tcuDeviceGet                    *cuDeviceGet;
1563 extern tcuDeviceGetCount               *cuDeviceGetCount;
1564 extern tcuDeviceGetName                *cuDeviceGetName;
1565 extern tcuDeviceComputeCapability      *cuDeviceComputeCapability;
1566 extern tcuDeviceGetProperties          *cuDeviceGetProperties;
1567 extern tcuDeviceGetAttribute           *cuDeviceGetAttribute;
1568 extern tcuCtxDestroy                   *cuCtxDestroy;
1569 extern tcuCtxAttach                    *cuCtxAttach;
1570 extern tcuCtxDetach                    *cuCtxDetach;
1571 extern tcuCtxPushCurrent               *cuCtxPushCurrent;
1572 extern tcuCtxPopCurrent                *cuCtxPopCurrent;
1573 
1574 extern tcuCtxSetCurrent                *cuCtxSetCurrent;
1575 extern tcuCtxGetCurrent                *cuCtxGetCurrent;
1576 
1577 extern tcuCtxGetDevice                 *cuCtxGetDevice;
1578 extern tcuCtxSynchronize               *cuCtxSynchronize;
1579 extern tcuModuleLoad                   *cuModuleLoad;
1580 extern tcuModuleLoadData               *cuModuleLoadData;
1581 extern tcuModuleLoadDataEx             *cuModuleLoadDataEx;
1582 extern tcuModuleLoadFatBinary          *cuModuleLoadFatBinary;
1583 extern tcuModuleUnload                 *cuModuleUnload;
1584 extern tcuModuleGetFunction            *cuModuleGetFunction;
1585 extern tcuModuleGetTexRef              *cuModuleGetTexRef;
1586 extern tcuModuleGetSurfRef             *cuModuleGetSurfRef;
1587 extern tcuMemFreeHost                  *cuMemFreeHost;
1588 extern tcuMemHostAlloc                 *cuMemHostAlloc;
1589 extern tcuMemHostGetFlags              *cuMemHostGetFlags;
1590 
1591 extern tcuMemHostRegister              *cuMemHostRegister;
1592 extern tcuMemHostUnregister            *cuMemHostUnregister;
1593 extern tcuMemcpy                       *cuMemcpy;
1594 extern tcuMemcpyPeer                   *cuMemcpyPeer;
1595 
1596 extern tcuDeviceTotalMem               *cuDeviceTotalMem;
1597 extern tcuCtxCreate                    *cuCtxCreate;
1598 extern tcuModuleGetGlobal              *cuModuleGetGlobal;
1599 extern tcuMemGetInfo                   *cuMemGetInfo;
1600 extern tcuMemAlloc                     *cuMemAlloc;
1601 extern tcuMemAllocPitch                *cuMemAllocPitch;
1602 extern tcuMemFree                      *cuMemFree;
1603 extern tcuMemGetAddressRange           *cuMemGetAddressRange;
1604 extern tcuMemAllocHost                 *cuMemAllocHost;
1605 extern tcuMemHostGetDevicePointer      *cuMemHostGetDevicePointer;
1606 extern tcuFuncSetBlockShape            *cuFuncSetBlockShape;
1607 extern tcuFuncSetSharedSize            *cuFuncSetSharedSize;
1608 extern tcuFuncGetAttribute             *cuFuncGetAttribute;
1609 extern tcuFuncSetCacheConfig           *cuFuncSetCacheConfig;
1610 extern tcuLaunchKernel                 *cuLaunchKernel;
1611 extern tcuArrayDestroy                 *cuArrayDestroy;
1612 extern tcuTexRefCreate                 *cuTexRefCreate;
1613 extern tcuTexRefDestroy                *cuTexRefDestroy;
1614 extern tcuTexRefSetArray               *cuTexRefSetArray;
1615 extern tcuTexRefSetFormat              *cuTexRefSetFormat;
1616 extern tcuTexRefSetAddressMode         *cuTexRefSetAddressMode;
1617 extern tcuTexRefSetFilterMode          *cuTexRefSetFilterMode;
1618 extern tcuTexRefSetFlags               *cuTexRefSetFlags;
1619 extern tcuTexRefGetArray               *cuTexRefGetArray;
1620 extern tcuTexRefGetAddressMode         *cuTexRefGetAddressMode;
1621 extern tcuTexRefGetFilterMode          *cuTexRefGetFilterMode;
1622 extern tcuTexRefGetFormat              *cuTexRefGetFormat;
1623 extern tcuTexRefGetFlags               *cuTexRefGetFlags;
1624 extern tcuSurfRefSetArray              *cuSurfRefSetArray;
1625 extern tcuSurfRefGetArray              *cuSurfRefGetArray;
1626 extern tcuParamSetSize                 *cuParamSetSize;
1627 extern tcuParamSeti                    *cuParamSeti;
1628 extern tcuParamSetf                    *cuParamSetf;
1629 extern tcuParamSetv                    *cuParamSetv;
1630 extern tcuParamSetTexRef               *cuParamSetTexRef;
1631 extern tcuLaunch                       *cuLaunch;
1632 extern tcuLaunchGrid                   *cuLaunchGrid;
1633 extern tcuLaunchGridAsync              *cuLaunchGridAsync;
1634 extern tcuEventCreate                  *cuEventCreate;
1635 extern tcuEventRecord                  *cuEventRecord;
1636 extern tcuEventQuery                   *cuEventQuery;
1637 extern tcuEventSynchronize             *cuEventSynchronize;
1638 extern tcuEventDestroy                 *cuEventDestroy;
1639 extern tcuEventElapsedTime             *cuEventElapsedTime;
1640 extern tcuStreamCreate                 *cuStreamCreate;
1641 extern tcuStreamQuery                  *cuStreamQuery;
1642 extern tcuStreamSynchronize            *cuStreamSynchronize;
1643 extern tcuStreamDestroy                *cuStreamDestroy;
1644 extern tcuGraphicsUnregisterResource   *cuGraphicsUnregisterResource;
1645 extern tcuGraphicsSubResourceGetMappedArray  *cuGraphicsSubResourceGetMappedArray;
1646 extern tcuGraphicsResourceSetMapFlags  *cuGraphicsResourceSetMapFlags;
1647 extern tcuGraphicsMapResources         *cuGraphicsMapResources;
1648 extern tcuGraphicsUnmapResources       *cuGraphicsUnmapResources;
1649 extern tcuGetExportTable               *cuGetExportTable;
1650 extern tcuCtxSetLimit                  *cuCtxSetLimit;
1651 extern tcuCtxGetLimit                  *cuCtxGetLimit;
1652 
1653 // These functions could be using the CUDA 3.2 interface (_v2)
1654 extern tcuMemcpyHtoD                   *cuMemcpyHtoD;
1655 extern tcuMemcpyDtoH                   *cuMemcpyDtoH;
1656 extern tcuMemcpyDtoD                   *cuMemcpyDtoD;
1657 extern tcuMemcpyDtoA                   *cuMemcpyDtoA;
1658 extern tcuMemcpyAtoD                   *cuMemcpyAtoD;
1659 extern tcuMemcpyHtoA                   *cuMemcpyHtoA;
1660 extern tcuMemcpyAtoH                   *cuMemcpyAtoH;
1661 extern tcuMemcpyAtoA                   *cuMemcpyAtoA;
1662 extern tcuMemcpy2D                     *cuMemcpy2D;
1663 extern tcuMemcpy2DUnaligned            *cuMemcpy2DUnaligned;
1664 extern tcuMemcpy3D                     *cuMemcpy3D;
1665 extern tcuMemcpyHtoDAsync              *cuMemcpyHtoDAsync;
1666 extern tcuMemcpyDtoHAsync              *cuMemcpyDtoHAsync;
1667 extern tcuMemcpyDtoDAsync              *cuMemcpyDtoDAsync;
1668 extern tcuMemcpyHtoAAsync              *cuMemcpyHtoAAsync;
1669 extern tcuMemcpyAtoHAsync              *cuMemcpyAtoHAsync;
1670 extern tcuMemcpy2DAsync                *cuMemcpy2DAsync;
1671 extern tcuMemcpy3DAsync                *cuMemcpy3DAsync;
1672 extern tcuMemsetD8                     *cuMemsetD8;
1673 extern tcuMemsetD16                    *cuMemsetD16;
1674 extern tcuMemsetD32                    *cuMemsetD32;
1675 extern tcuMemsetD2D8                   *cuMemsetD2D8;
1676 extern tcuMemsetD2D16                  *cuMemsetD2D16;
1677 extern tcuMemsetD2D32                  *cuMemsetD2D32;
1678 extern tcuArrayCreate                  *cuArrayCreate;
1679 extern tcuArrayGetDescriptor           *cuArrayGetDescriptor;
1680 extern tcuArray3DCreate                *cuArray3DCreate;
1681 extern tcuArray3DGetDescriptor         *cuArray3DGetDescriptor;
1682 extern tcuTexRefSetAddress             *cuTexRefSetAddress;
1683 extern tcuTexRefSetAddress2D           *cuTexRefSetAddress2D;
1684 extern tcuTexRefGetAddress             *cuTexRefGetAddress;
1685 extern tcuGraphicsResourceGetMappedPointer   *cuGraphicsResourceGetMappedPointer;
1686 
1687 /************************************/
1688 CUresult CUDAAPI cuInit   (unsigned int, int cudaVersion);
1689 void CUDAAPI cuUninit();
1690 /************************************/
1691 
1692 
1693 #ifndef __CUDA_API_VERSION
1694 #define __CUDA_API_VERSION 4000
1695 #endif
1696 
1697 
1698 #include <gpac/setup.h>
1699 #include "../../src/compositor/gl_inc.h"
1700 
1701 /**
1702  * \file dynlink_cudaGL.h
1703  * \brief Header file for the OpenGL interoperability functions of the
1704  * low-level CUDA driver application programming interface.
1705  */
1706 
1707 /**
1708  * \defgroup CUDA_GL OpenGL Interoperability
1709  * \ingroup CUDA_DRIVER
1710  *
1711  * ___MANBRIEF___ OpenGL interoperability functions of the low-level CUDA
1712  * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
1713  *
1714  * This section describes the OpenGL interoperability functions of the
1715  * low-level CUDA driver application programming interface. Note that mapping
1716  * of OpenGL resources is performed with the graphics API agnostic, resource
1717  * mapping interface described in \ref CUDA_GRAPHICS "Graphics Interoperability".
1718  *
1719  * @{
1720  */
1721 
1722 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
1723 #if !defined(WGL_NV_gpu_affinity)
1724 typedef void* HGPUNV;
1725 #endif
1726 #endif /* _WIN32 */
1727 
1728 typedef CUresult CUDAAPI tcuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
1729 typedef CUresult CUDAAPI tcuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
1730 
1731 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
1732 typedef CUresult CUDAAPI tcuWGLGetDevice(CUdevice *pDevice, HGPUNV hGpu);
1733 #endif /* _WIN32 */
1734 
1735 /**
1736  * CUDA devices corresponding to an OpenGL device
1737  */
1738 typedef enum {
1739     CU_GL_DEVICE_LIST_ALL            = 0x01, /**< The CUDA devices for all GPUs used by the current OpenGL context */
1740     CU_GL_DEVICE_LIST_CURRENT_FRAME  = 0x02, /**< The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame */
1741     CU_GL_DEVICE_LIST_NEXT_FRAME     = 0x03, /**< The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame */
1742 } CUGLDeviceList;
1743 
1744 #if __CUDA_API_VERSION >= 6050
1745 typedef CUresult CUDAAPI tcuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
1746 #endif /* __CUDA_API_VERSION >= 6050 */
1747 
1748 /**
1749  * \defgroup CUDA_GL_DEPRECATED OpenGL Interoperability [DEPRECATED]
1750  *
1751  * ___MANBRIEF___ deprecated OpenGL interoperability functions of the low-level
1752  * CUDA driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
1753  *
1754  * This section describes deprecated OpenGL interoperability functionality.
1755  *
1756  * @{
1757  */
1758 
1759 /** Flags to map or unmap a resource */
1760 typedef enum CUGLmap_flags_enum {
1761     CU_GL_MAP_RESOURCE_FLAGS_NONE          = 0x00,
1762     CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
1763     CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
1764 } CUGLmap_flags;
1765 
1766 //#if __CUDA_API_VERSION >= 3020
1767 typedef CUresult CUDAAPI tcuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device);
1768 //#endif /* __CUDA_API_VERSION >= 3020 */
1769 
1770 typedef CUresult CUDAAPI tcuGLInit(void);
1771 typedef CUresult CUDAAPI tcuGLRegisterBufferObject(GLuint buffer);
1772 
1773 #if __CUDA_API_VERSION >= 3020
1774 typedef CUresult CUDAAPI tcuGLMapBufferObject(CUdeviceptr *dptr, size_t *size, GLuint buffer);
1775 #endif /* __CUDA_API_VERSION >= 3020 */
1776 
1777 typedef CUresult CUDAAPI tcuGLUnmapBufferObject(GLuint buffer);
1778 typedef CUresult CUDAAPI tcuGLUnregisterBufferObject(GLuint buffer);
1779 typedef CUresult CUDAAPI tcuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Flags);
1780 
1781 #if __CUDA_API_VERSION >= 3020
1782 typedef CUresult CUDAAPI tcuGLMapBufferObjectAsync(CUdeviceptr *dptr, size_t *size, GLuint buffer, CUstream hStream);
1783 #endif /* __CUDA_API_VERSION >= 3020 */
1784 
1785 typedef CUresult CUDAAPI tcuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
1786 typedef CUresult CUDAAPI tcuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
1787 
1788 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
1789 extern tcuWGLGetDevice                 *cuWGLGetDevice;
1790 #endif
1791 
1792 extern tcuGLCtxCreate                  *cuGLCtxCreate;
1793 extern tcuGLCtxCreate                  *cuGLCtxCreate_v2;
1794 extern tcuGLMapBufferObject            *cuGLMapBufferObject;
1795 extern tcuGLMapBufferObject            *cuGLMapBufferObject_v2;
1796 extern tcuGLMapBufferObjectAsync       *cuGLMapBufferObjectAsync;
1797 
1798 #if __CUDA_API_VERSION >= 6050
1799 extern tcuGLGetDevices                 *cuGLGetDevices;
1800 #endif
1801 
1802 extern tcuGraphicsGLRegisterBuffer     *cuGraphicsGLRegisterBuffer;
1803 extern tcuGraphicsGLRegisterImage      *cuGraphicsGLRegisterImage;
1804 extern tcuGLSetBufferObjectMapFlags    *cuGLSetBufferObjectMapFlags;
1805 extern tcuGLRegisterBufferObject       *cuGLRegisterBufferObject;
1806 
1807 extern tcuGLUnmapBufferObject          *cuGLUnmapBufferObject;
1808 extern tcuGLUnmapBufferObjectAsync     *cuGLUnmapBufferObjectAsync;
1809 
1810 extern tcuGLUnregisterBufferObject     *cuGLUnregisterBufferObject;
1811 extern tcuGLGetDevices                 *cuGLGetDevices; // CUDA 6.5 only
1812 
1813 
1814 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
1815 #include <Windows.h>
1816 typedef HMODULE CUDADRIVER;
1817 #else
1818 typedef void *CUDADRIVER;
1819 #endif
1820 
1821 
1822 
1823 
1824 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
1825 #if (CUDA_VERSION >= 3020) && (!defined(CUDA_FORCE_API_VERSION) || (CUDA_FORCE_API_VERSION >= 3020))
1826 #define __CUVID_DEVPTR64
1827 #endif
1828 #endif
1829 
1830 
1831 typedef void *CUvideodecoder;
1832 typedef struct _CUcontextlock_st *CUvideoctxlock;
1833 
1834 /**
1835  * \addtogroup VIDEO_DECODER Video Decoder
1836  * @{
1837  */
1838 
1839 /*!
1840  * \enum cudaVideoCodec
1841  * Video Codec Enums
1842  */
1843 typedef enum cudaVideoCodec_enum {
1844     cudaVideoCodec_MPEG1=0,                 /**<  MPEG1   */
1845     cudaVideoCodec_MPEG2,                   /**<  MPEG2  */
1846     cudaVideoCodec_MPEG4,                   /**<  MPEG4   */
1847     cudaVideoCodec_VC1,                     /**<  VC1   */
1848     cudaVideoCodec_H264,                    /**<  H264   */
1849     cudaVideoCodec_JPEG,                    /**<  JPEG   */
1850     cudaVideoCodec_H264_SVC,                /**<  H264-SVC   */
1851     cudaVideoCodec_H264_MVC,                /**<  H264-MVC   */
1852     cudaVideoCodec_HEVC,                    /**<  HEVC   */
1853     cudaVideoCodec_VP8,                     /**<  VP8   */
1854     cudaVideoCodec_VP9,                     /**<  VP9   */
1855     cudaVideoCodec_NumCodecs,               /**<  Max COdecs   */
1856     // Uncompressed YUV
1857     cudaVideoCodec_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')),   /**< Y,U,V (4:2:0)  */
1858     cudaVideoCodec_YV12   = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,V,U (4:2:0)  */
1859     cudaVideoCodec_NV12   = (('N'<<24)|('V'<<16)|('1'<<8)|('2')),   /**< Y,UV  (4:2:0)  */
1860     cudaVideoCodec_YUYV   = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')),   /**< YUYV/YUY2 (4:2:2)  */
1861     cudaVideoCodec_UYVY   = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y'))    /**< UYVY (4:2:2)  */
1862 } cudaVideoCodec;
1863 
1864 /*!
1865  * \enum cudaVideoSurfaceFormat
1866  * Video Surface Formats Enums
1867  */
1868 typedef enum cudaVideoSurfaceFormat_enum {
1869     cudaVideoSurfaceFormat_NV12=0,       /**< NV12 (currently the only supported output format)  */
1870 	cudaVideoSurfaceFormat_P016=1
1871 } cudaVideoSurfaceFormat;
1872 
1873 /*!
1874  * \enum cudaVideoDeinterlaceMode
1875  * Deinterlacing Modes Enums
1876  */
1877 typedef enum cudaVideoDeinterlaceMode_enum {
1878     cudaVideoDeinterlaceMode_Weave=0,   /**< Weave both fields (no deinterlacing) */
1879     cudaVideoDeinterlaceMode_Bob,       /**< Drop one field  */
1880     cudaVideoDeinterlaceMode_Adaptive   /**< Adaptive deinterlacing  */
1881 } cudaVideoDeinterlaceMode;
1882 
1883 /*!
1884  * \enum cudaVideoChromaFormat
1885  * Chroma Formats Enums
1886  */
1887 typedef enum cudaVideoChromaFormat_enum {
1888     cudaVideoChromaFormat_Monochrome=0,  /**< MonoChrome */
1889     cudaVideoChromaFormat_420,           /**< 4:2:0 */
1890     cudaVideoChromaFormat_422,           /**< 4:2:2 */
1891     cudaVideoChromaFormat_444            /**< 4:4:4 */
1892 } cudaVideoChromaFormat;
1893 
1894 /*!
1895  * \enum cudaVideoCreateFlags
1896  * Decoder Flags Enums
1897  */
1898 typedef enum cudaVideoCreateFlags_enum {
1899     cudaVideoCreate_Default = 0x00,     /**< Default operation mode: use dedicated video engines */
1900     cudaVideoCreate_PreferCUDA = 0x01,  /**< Use a CUDA-based decoder if faster than dedicated engines (requires a valid vidLock object for multi-threading) */
1901     cudaVideoCreate_PreferDXVA = 0x02,  /**< Go through DXVA internally if possible (requires D3D9 interop) */
1902     cudaVideoCreate_PreferCUVID = 0x04  /**< Use dedicated video engines directly */
1903 } cudaVideoCreateFlags;
1904 
1905 /*!
1906  * \struct CUVIDDECODECREATEINFO
1907  * Struct used in create decoder
1908  */
1909 typedef struct _CUVIDDECODECREATEINFO
1910 {
1911     unsigned long ulWidth;              /**< Coded Sequence Width */
1912     unsigned long ulHeight;             /**< Coded Sequence Height */
1913     unsigned long ulNumDecodeSurfaces;  /**< Maximum number of internal decode surfaces */
1914     cudaVideoCodec CodecType;           /**< cudaVideoCodec_XXX */
1915     cudaVideoChromaFormat ChromaFormat; /**< cudaVideoChromaFormat_XXX (only 4:2:0 is currently supported) */
1916     unsigned long ulCreationFlags;      /**< Decoder creation flags (cudaVideoCreateFlags_XXX) */
1917     unsigned long bitDepthMinus8;
1918     unsigned long Reserved1[4];         /**< Reserved for future use - set to zero */
1919     /**
1920     * area of the frame that should be displayed
1921     */
1922     struct {
1923         short left;
1924         short top;
1925         short right;
1926         short bottom;
1927     } display_area;
1928 
1929     cudaVideoSurfaceFormat OutputFormat;       /**< cudaVideoSurfaceFormat_XXX */
1930     cudaVideoDeinterlaceMode DeinterlaceMode;  /**< cudaVideoDeinterlaceMode_XXX */
1931     unsigned long ulTargetWidth;               /**< Post-processed Output Width (Should be aligned to 2) */
1932     unsigned long ulTargetHeight;              /**< Post-processed Output Height (Should be aligbed to 2) */
1933     unsigned long ulNumOutputSurfaces;         /**< Maximum number of output surfaces simultaneously mapped */
1934     CUvideoctxlock vidLock;                    /**< If non-NULL, context lock used for synchronizing ownership of the cuda context */
1935     /**
1936     * target rectangle in the output frame (for aspect ratio conversion)
1937     * if a null rectangle is specified, {0,0,ulTargetWidth,ulTargetHeight} will be used
1938     */
1939     struct {
1940         short left;
1941         short top;
1942         short right;
1943         short bottom;
1944     } target_rect;
1945     unsigned long Reserved2[5];                /**< Reserved for future use - set to zero */
1946 } CUVIDDECODECREATEINFO;
1947 
1948 /*!
1949  * \struct CUVIDH264DPBENTRY
1950  * H.264 DPB Entry
1951  */
1952 typedef struct _CUVIDH264DPBENTRY
1953 {
1954     int PicIdx;                 /**< picture index of reference frame */
1955     int FrameIdx;               /**< frame_num(short-term) or LongTermFrameIdx(long-term) */
1956     int is_long_term;           /**< 0=short term reference, 1=long term reference */
1957     int not_existing;           /**< non-existing reference frame (corresponding PicIdx should be set to -1) */
1958     int used_for_reference;     /**< 0=unused, 1=top_field, 2=bottom_field, 3=both_fields */
1959     int FieldOrderCnt[2];       /**< field order count of top and bottom fields */
1960 } CUVIDH264DPBENTRY;
1961 
1962 /*!
1963  * \struct CUVIDH264MVCEXT
1964  * H.264 MVC Picture Parameters Ext
1965  */
1966 typedef struct _CUVIDH264MVCEXT
1967 {
1968     int num_views_minus1;
1969     int view_id;
1970     unsigned char inter_view_flag;
1971     unsigned char num_inter_view_refs_l0;
1972     unsigned char num_inter_view_refs_l1;
1973     unsigned char MVCReserved8Bits;
1974     int InterViewRefsL0[16];
1975     int InterViewRefsL1[16];
1976 } CUVIDH264MVCEXT;
1977 
1978 /*!
1979  * \struct CUVIDH264SVCEXT
1980  * H.264 SVC Picture Parameters Ext
1981  */
1982 typedef struct _CUVIDH264SVCEXT
1983 {
1984     unsigned char profile_idc;
1985     unsigned char level_idc;
1986     unsigned char DQId;
1987     unsigned char DQIdMax;
1988     unsigned char disable_inter_layer_deblocking_filter_idc;
1989     unsigned char ref_layer_chroma_phase_y_plus1;
1990     signed char   inter_layer_slice_alpha_c0_offset_div2;
1991     signed char   inter_layer_slice_beta_offset_div2;
1992 
1993     unsigned short DPBEntryValidFlag;
1994     unsigned char inter_layer_deblocking_filter_control_present_flag;
1995     unsigned char extended_spatial_scalability_idc;
1996     unsigned char adaptive_tcoeff_level_prediction_flag;
1997     unsigned char slice_header_restriction_flag;
1998     unsigned char chroma_phase_x_plus1_flag;
1999     unsigned char chroma_phase_y_plus1;
2000 
2001     unsigned char tcoeff_level_prediction_flag;
2002     unsigned char constrained_intra_resampling_flag;
2003     unsigned char ref_layer_chroma_phase_x_plus1_flag;
2004     unsigned char store_ref_base_pic_flag;
2005     unsigned char Reserved8BitsA;
2006     unsigned char Reserved8BitsB;
2007     // For the 4 scaled_ref_layer_XX fields below,
2008     // if (extended_spatial_scalability_idc == 1), SPS field, G.7.3.2.1.4, add prefix "seq_"
2009     // if (extended_spatial_scalability_idc == 2), SLH field, G.7.3.3.4,
2010     short scaled_ref_layer_left_offset;
2011     short scaled_ref_layer_top_offset;
2012     short scaled_ref_layer_right_offset;
2013     short scaled_ref_layer_bottom_offset;
2014     unsigned short Reserved16Bits;
2015     struct _CUVIDPICPARAMS *pNextLayer; /**< Points to the picparams for the next layer to be decoded. Linked list ends at the target layer. */
2016     int bRefBaseLayer;                  /**< whether to store ref base pic */
2017 } CUVIDH264SVCEXT;
2018 
2019 /*!
2020  * \struct CUVIDH264PICPARAMS
2021  * H.264 Picture Parameters
2022  */
2023 typedef struct _CUVIDH264PICPARAMS
2024 {
2025     // SPS
2026     int log2_max_frame_num_minus4;
2027     int pic_order_cnt_type;
2028     int log2_max_pic_order_cnt_lsb_minus4;
2029     int delta_pic_order_always_zero_flag;
2030     int frame_mbs_only_flag;
2031     int direct_8x8_inference_flag;
2032     int num_ref_frames;             // NOTE: shall meet level 4.1 restrictions
2033     unsigned char residual_colour_transform_flag;
2034     unsigned char bit_depth_luma_minus8;    // Must be 0 (only 8-bit supported)
2035     unsigned char bit_depth_chroma_minus8;  // Must be 0 (only 8-bit supported)
2036     unsigned char qpprime_y_zero_transform_bypass_flag;
2037     // PPS
2038     int entropy_coding_mode_flag;
2039     int pic_order_present_flag;
2040     int num_ref_idx_l0_active_minus1;
2041     int num_ref_idx_l1_active_minus1;
2042     int weighted_pred_flag;
2043     int weighted_bipred_idc;
2044     int pic_init_qp_minus26;
2045     int deblocking_filter_control_present_flag;
2046     int redundant_pic_cnt_present_flag;
2047     int transform_8x8_mode_flag;
2048     int MbaffFrameFlag;
2049     int constrained_intra_pred_flag;
2050     int chroma_qp_index_offset;
2051     int second_chroma_qp_index_offset;
2052     int ref_pic_flag;
2053     int frame_num;
2054     int CurrFieldOrderCnt[2];
2055     // DPB
2056     CUVIDH264DPBENTRY dpb[16];          // List of reference frames within the DPB
2057     // Quantization Matrices (raster-order)
2058     unsigned char WeightScale4x4[6][16];
2059     unsigned char WeightScale8x8[2][64];
2060     // FMO/ASO
2061     unsigned char fmo_aso_enable;
2062     unsigned char num_slice_groups_minus1;
2063     unsigned char slice_group_map_type;
2064     signed char pic_init_qs_minus26;
2065     unsigned int slice_group_change_rate_minus1;
2066     union
2067     {
2068         unsigned long long slice_group_map_addr;
2069         const unsigned char *pMb2SliceGroupMap;
2070     } fmo;
2071     unsigned int  Reserved[12];
2072     // SVC/MVC
2073     union
2074     {
2075         CUVIDH264MVCEXT mvcext;
2076         CUVIDH264SVCEXT svcext;
2077     };
2078 } CUVIDH264PICPARAMS;
2079 
2080 
2081 /*!
2082  * \struct CUVIDMPEG2PICPARAMS
2083  * MPEG-2 Picture Parameters
2084  */
2085 typedef struct _CUVIDMPEG2PICPARAMS
2086 {
2087     int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
2088     int BackwardRefIdx;         // Picture index of backward reference (B-frames)
2089     int picture_coding_type;
2090     int full_pel_forward_vector;
2091     int full_pel_backward_vector;
2092     int f_code[2][2];
2093     int intra_dc_precision;
2094     int frame_pred_frame_dct;
2095     int concealment_motion_vectors;
2096     int q_scale_type;
2097     int intra_vlc_format;
2098     int alternate_scan;
2099     int top_field_first;
2100     // Quantization matrices (raster order)
2101     unsigned char QuantMatrixIntra[64];
2102     unsigned char QuantMatrixInter[64];
2103 } CUVIDMPEG2PICPARAMS;
2104 
2105 ////////////////////////////////////////////////////////////////////////////////////////////////
2106 //
2107 // MPEG-4 Picture Parameters
2108 //
2109 
2110 // MPEG-4 has VOP types instead of Picture types
2111 #define I_VOP 0
2112 #define P_VOP 1
2113 #define B_VOP 2
2114 #define S_VOP 3
2115 
2116 /*!
2117  * \struct CUVIDMPEG4PICPARAMS
2118  * MPEG-4 Picture Parameters
2119  */
2120 typedef struct _CUVIDMPEG4PICPARAMS
2121 {
2122     int ForwardRefIdx;          // Picture index of forward reference (P/B-frames)
2123     int BackwardRefIdx;         // Picture index of backward reference (B-frames)
2124     // VOL
2125     int video_object_layer_width;
2126     int video_object_layer_height;
2127     int vop_time_increment_bitcount;
2128     int top_field_first;
2129     int resync_marker_disable;
2130     int quant_type;
2131     int quarter_sample;
2132     int short_video_header;
2133     int divx_flags;
2134     // VOP
2135     int vop_coding_type;
2136     int vop_coded;
2137     int vop_rounding_type;
2138     int alternate_vertical_scan_flag;
2139     int interlaced;
2140     int vop_fcode_forward;
2141     int vop_fcode_backward;
2142     int trd[2];
2143     int trb[2];
2144     // Quantization matrices (raster order)
2145     unsigned char QuantMatrixIntra[64];
2146     unsigned char QuantMatrixInter[64];
2147     int gmc_enabled;
2148 } CUVIDMPEG4PICPARAMS;
2149 
2150 /*!
2151  * \struct CUVIDVC1PICPARAMS
2152  * VC1 Picture Parameters
2153  */
2154 typedef struct _CUVIDVC1PICPARAMS
2155 {
2156     int ForwardRefIdx;      /**< Picture index of forward reference (P/B-frames) */
2157     int BackwardRefIdx;     /**< Picture index of backward reference (B-frames) */
2158     int FrameWidth;         /**< Actual frame width */
2159     int FrameHeight;        /**< Actual frame height */
2160     // PICTURE
2161     int intra_pic_flag;     /**< Set to 1 for I,BI frames */
2162     int ref_pic_flag;       /**< Set to 1 for I,P frames */
2163     int progressive_fcm;    /**< Progressive frame */
2164     // SEQUENCE
2165     int profile;
2166     int postprocflag;
2167     int pulldown;
2168     int interlace;
2169     int tfcntrflag;
2170     int finterpflag;
2171     int psf;
2172     int multires;
2173     int syncmarker;
2174     int rangered;
2175     int maxbframes;
2176     // ENTRYPOINT
2177     int panscan_flag;
2178     int refdist_flag;
2179     int extended_mv;
2180     int dquant;
2181     int vstransform;
2182     int loopfilter;
2183     int fastuvmc;
2184     int overlap;
2185     int quantizer;
2186     int extended_dmv;
2187     int range_mapy_flag;
2188     int range_mapy;
2189     int range_mapuv_flag;
2190     int range_mapuv;
2191     int rangeredfrm;    // range reduction state
2192 } CUVIDVC1PICPARAMS;
2193 
2194 /*!
2195  * \struct CUVIDJPEGPICPARAMS
2196  * JPEG Picture Parameters
2197  */
2198 typedef struct _CUVIDJPEGPICPARAMS
2199 {
2200     int Reserved;
2201 } CUVIDJPEGPICPARAMS;
2202 
2203 
2204  /*!
2205  * \struct CUVIDHEVCPICPARAMS
2206  * HEVC Picture Parameters
2207  */
2208 typedef struct _CUVIDHEVCPICPARAMS
2209 {
2210     // sps
2211     int pic_width_in_luma_samples;
2212     int pic_height_in_luma_samples;
2213     unsigned char log2_min_luma_coding_block_size_minus3;
2214     unsigned char log2_diff_max_min_luma_coding_block_size;
2215     unsigned char log2_min_transform_block_size_minus2;
2216     unsigned char log2_diff_max_min_transform_block_size;
2217     unsigned char pcm_enabled_flag;
2218     unsigned char log2_min_pcm_luma_coding_block_size_minus3;
2219     unsigned char log2_diff_max_min_pcm_luma_coding_block_size;
2220     unsigned char pcm_sample_bit_depth_luma_minus1;
2221 
2222     unsigned char pcm_sample_bit_depth_chroma_minus1;
2223     unsigned char pcm_loop_filter_disabled_flag;
2224     unsigned char strong_intra_smoothing_enabled_flag;
2225     unsigned char max_transform_hierarchy_depth_intra;
2226     unsigned char max_transform_hierarchy_depth_inter;
2227     unsigned char amp_enabled_flag;
2228     unsigned char separate_colour_plane_flag;
2229     unsigned char log2_max_pic_order_cnt_lsb_minus4;
2230 
2231     unsigned char num_short_term_ref_pic_sets;
2232     unsigned char long_term_ref_pics_present_flag;
2233     unsigned char num_long_term_ref_pics_sps;
2234     unsigned char sps_temporal_mvp_enabled_flag;
2235     unsigned char sample_adaptive_offset_enabled_flag;
2236     unsigned char scaling_list_enable_flag;
2237     unsigned char IrapPicFlag;
2238     unsigned char IdrPicFlag;
2239 
2240     unsigned char bit_depth_luma_minus8;
2241     unsigned char bit_depth_chroma_minus8;
2242     unsigned char reserved1[14];
2243 
2244     // pps
2245     unsigned char dependent_slice_segments_enabled_flag;
2246     unsigned char slice_segment_header_extension_present_flag;
2247     unsigned char sign_data_hiding_enabled_flag;
2248     unsigned char cu_qp_delta_enabled_flag;
2249     unsigned char diff_cu_qp_delta_depth;
2250     signed char init_qp_minus26;
2251     signed char pps_cb_qp_offset;
2252     signed char pps_cr_qp_offset;
2253 
2254     unsigned char constrained_intra_pred_flag;
2255     unsigned char weighted_pred_flag;
2256     unsigned char weighted_bipred_flag;
2257     unsigned char transform_skip_enabled_flag;
2258     unsigned char transquant_bypass_enabled_flag;
2259     unsigned char entropy_coding_sync_enabled_flag;
2260     unsigned char log2_parallel_merge_level_minus2;
2261     unsigned char num_extra_slice_header_bits;
2262 
2263     unsigned char loop_filter_across_tiles_enabled_flag;
2264     unsigned char loop_filter_across_slices_enabled_flag;
2265     unsigned char output_flag_present_flag;
2266     unsigned char num_ref_idx_l0_default_active_minus1;
2267     unsigned char num_ref_idx_l1_default_active_minus1;
2268     unsigned char lists_modification_present_flag;
2269     unsigned char cabac_init_present_flag;
2270     unsigned char pps_slice_chroma_qp_offsets_present_flag;
2271 
2272     unsigned char deblocking_filter_override_enabled_flag;
2273     unsigned char pps_deblocking_filter_disabled_flag;
2274     signed char pps_beta_offset_div2;
2275     signed char pps_tc_offset_div2;
2276     unsigned char tiles_enabled_flag;
2277     unsigned char uniform_spacing_flag;
2278     unsigned char num_tile_columns_minus1;
2279     unsigned char num_tile_rows_minus1;
2280 
2281     unsigned short column_width_minus1[21];
2282     unsigned short row_height_minus1[21];
2283     unsigned int reserved3[15];
2284 
2285     // RefPicSets
2286     int NumBitsForShortTermRPSInSlice;
2287     int NumDeltaPocsOfRefRpsIdx;
2288     int NumPocTotalCurr;
2289     int NumPocStCurrBefore;
2290     int NumPocStCurrAfter;
2291     int NumPocLtCurr;
2292     int CurrPicOrderCntVal;
2293     int RefPicIdx[16];                  // [refpic] Indices of valid reference pictures (-1 if unused for reference)
2294     int PicOrderCntVal[16];             // [refpic]
2295     unsigned char IsLongTerm[16];       // [refpic] 0=not a long-term reference, 1=long-term reference
2296     unsigned char RefPicSetStCurrBefore[8]; // [0..NumPocStCurrBefore-1] -> refpic (0..15)
2297     unsigned char RefPicSetStCurrAfter[8];  // [0..NumPocStCurrAfter-1] -> refpic (0..15)
2298     unsigned char RefPicSetLtCurr[8];       // [0..NumPocLtCurr-1] -> refpic (0..15)
2299     unsigned char RefPicSetInterLayer0[8];
2300     unsigned char RefPicSetInterLayer1[8];
2301     unsigned int reserved4[12];
2302 
2303     // scaling lists (diag order)
2304     unsigned char ScalingList4x4[6][16];       // [matrixId][i]
2305     unsigned char ScalingList8x8[6][64];       // [matrixId][i]
2306     unsigned char ScalingList16x16[6][64];     // [matrixId][i]
2307     unsigned char ScalingList32x32[2][64];     // [matrixId][i]
2308     unsigned char ScalingListDCCoeff16x16[6];  // [matrixId]
2309     unsigned char ScalingListDCCoeff32x32[2];  // [matrixId]
2310 } CUVIDHEVCPICPARAMS;
2311 
2312 
2313 /*!
2314  * \struct CUVIDVP8PICPARAMS
2315  * VP8 Picture Parameters
2316  */
2317 typedef struct _CUVIDVP8PICPARAMS
2318 {
2319     int width;
2320     int height;
2321     unsigned int first_partition_size;
2322     //Frame Indexes
2323     unsigned char LastRefIdx;
2324     unsigned char GoldenRefIdx;
2325     unsigned char AltRefIdx;
2326     union {
2327         struct {
2328             unsigned char frame_type : 1;    /**< 0 = KEYFRAME, 1 = INTERFRAME  */
2329             unsigned char version : 3;
2330             unsigned char show_frame : 1;
2331             unsigned char update_mb_segmentation_data : 1;    /**< Must be 0 if segmentation is not enabled */
2332             unsigned char Reserved2Bits : 2;
2333         };
2334         unsigned char wFrameTagFlags;
2335     };
2336     unsigned char Reserved1[4];
2337     unsigned int  Reserved2[3];
2338 } CUVIDVP8PICPARAMS;
2339 
2340 /*!
2341  * \struct CUVIDVP9PICPARAMS
2342  * VP9 Picture Parameters
2343  */
2344 typedef struct _CUVIDVP9PICPARAMS
2345 {
2346     unsigned int width;
2347     unsigned int height;
2348 
2349     //Frame Indices
2350     unsigned char LastRefIdx;
2351     unsigned char GoldenRefIdx;
2352     unsigned char AltRefIdx;
2353     unsigned char colorSpace;
2354 
2355     unsigned short profile : 3;
2356     unsigned short frameContextIdx : 2;
2357     unsigned short frameType : 1;
2358     unsigned short showFrame : 1;
2359     unsigned short errorResilient : 1;
2360     unsigned short frameParallelDecoding : 1;
2361     unsigned short subSamplingX : 1;
2362     unsigned short subSamplingY : 1;
2363     unsigned short intraOnly : 1;
2364     unsigned short allow_high_precision_mv : 1;
2365     unsigned short refreshEntropyProbs : 1;
2366     unsigned short reserved2Bits : 2;
2367 
2368     unsigned short reserved16Bits;
2369 
2370     unsigned char  refFrameSignBias[4];
2371 
2372     unsigned char bitDepthMinus8Luma;
2373     unsigned char bitDepthMinus8Chroma;
2374     unsigned char loopFilterLevel;
2375     unsigned char loopFilterSharpness;
2376 
2377     unsigned char modeRefLfEnabled;
2378     unsigned char log2_tile_columns;
2379     unsigned char log2_tile_rows;
2380 
2381     unsigned char segmentEnabled : 1;
2382     unsigned char segmentMapUpdate : 1;
2383     unsigned char segmentMapTemporalUpdate : 1;
2384     unsigned char segmentFeatureMode : 1;
2385     unsigned char reserved4Bits : 4;
2386 
2387 
2388     unsigned char segmentFeatureEnable[8][4];
2389     short segmentFeatureData[8][4];
2390     unsigned char mb_segment_tree_probs[7];
2391     unsigned char segment_pred_probs[3];
2392     unsigned char reservedSegment16Bits[2];
2393 
2394     int qpYAc;
2395     int qpYDc;
2396     int qpChDc;
2397     int qpChAc;
2398 
2399     unsigned int activeRefIdx[3];
2400     unsigned int resetFrameContext;
2401     unsigned int mcomp_filter_type;
2402     unsigned int mbRefLfDelta[4];
2403     unsigned int mbModeLfDelta[2];
2404     unsigned int frameTagSize;
2405     unsigned int offsetToDctParts;
2406     unsigned int reserved128Bits[4];
2407 
2408 } CUVIDVP9PICPARAMS;
2409 
2410 
2411 /*!
2412  * \struct CUVIDPICPARAMS
2413  * Picture Parameters for Decoding
2414  */
2415 typedef struct _CUVIDPICPARAMS
2416 {
2417     int PicWidthInMbs;                    /**< Coded Frame Size */
2418     int FrameHeightInMbs;                 /**< Coded Frame Height */
2419     int CurrPicIdx;                       /**< Output index of the current picture */
2420     int field_pic_flag;                   /**< 0=frame picture, 1=field picture */
2421     int bottom_field_flag;                /**< 0=top field, 1=bottom field (ignored if field_pic_flag=0) */
2422     int second_field;                     /**< Second field of a complementary field pair */
2423     // Bitstream data
2424     unsigned int nBitstreamDataLen;        /**< Number of bytes in bitstream data buffer */
2425     const unsigned char *pBitstreamData;   /**< Ptr to bitstream data for this picture (slice-layer) */
2426     unsigned int nNumSlices;               /**< Number of slices in this picture */
2427     const unsigned int *pSliceDataOffsets; /**< nNumSlices entries, contains offset of each slice within the bitstream data buffer */
2428     int ref_pic_flag;                      /**< This picture is a reference picture */
2429     int intra_pic_flag;                    /**< This picture is entirely intra coded */
2430     unsigned int Reserved[30];             /**< Reserved for future use */
2431     // Codec-specific data
2432     union {
2433         CUVIDMPEG2PICPARAMS mpeg2;         /**< Also used for MPEG-1 */
2434         CUVIDH264PICPARAMS h264;
2435         CUVIDVC1PICPARAMS vc1;
2436         CUVIDMPEG4PICPARAMS mpeg4;
2437         CUVIDJPEGPICPARAMS jpeg;
2438         CUVIDHEVCPICPARAMS hevc;
2439         CUVIDVP8PICPARAMS vp8;
2440         CUVIDVP9PICPARAMS vp9;
2441         unsigned int CodecReserved[1024];
2442     } CodecSpecific;
2443 } CUVIDPICPARAMS;
2444 
2445 
2446 /*!
2447  * \struct CUVIDPROCPARAMS
2448  * Picture Parameters for Postprocessing
2449  */
2450 typedef struct _CUVIDPROCPARAMS
2451 {
2452     int progressive_frame;  /**< Input is progressive (deinterlace_mode will be ignored)  */
2453     int second_field;       /**< Output the second field (ignored if deinterlace mode is Weave) */
2454     int top_field_first;    /**< Input frame is top field first (1st field is top, 2nd field is bottom) */
2455     int unpaired_field;     /**< Input only contains one field (2nd field is invalid) */
2456     // The fields below are used for raw YUV input
2457     unsigned int reserved_flags;        /**< Reserved for future use (set to zero) */
2458     unsigned int reserved_zero;         /**< Reserved (set to zero) */
2459     unsigned long long raw_input_dptr;  /**< Input CUdeviceptr for raw YUV extensions */
2460     unsigned int raw_input_pitch;       /**< pitch in bytes of raw YUV input (should be aligned appropriately) */
2461     unsigned int raw_input_format;      /**< Reserved for future use (set to zero) */
2462     unsigned long long raw_output_dptr; /**< Reserved for future use (set to zero) */
2463     unsigned int raw_output_pitch;      /**< Reserved for future use (set to zero) */
2464     unsigned int Reserved[48];
2465     void *Reserved3[3];
2466 } CUVIDPROCPARAMS;
2467 
2468 
2469 /**
2470  *
2471  * In order to minimize decode latencies, there should be always at least 2 pictures in the decode
2472  * queue at any time, in order to make sure that all decode engines are always busy.
2473  *
2474  * Overall data flow:
2475  *  - cuvidCreateDecoder(...)
2476  *  For each picture:
2477  *  - cuvidDecodePicture(N)
2478  *  - cuvidMapVideoFrame(N-4)
2479  *  - do some processing in cuda
2480  *  - cuvidUnmapVideoFrame(N-4)
2481  *  - cuvidDecodePicture(N+1)
2482  *  - cuvidMapVideoFrame(N-3)
2483  *    ...
2484  *  - cuvidDestroyDecoder(...)
2485  *
2486  * NOTE:
2487  * - When the cuda context is created from a D3D device, the D3D device must also be created
2488  *   with the D3DCREATE_MULTITHREADED flag.
2489  * - There is a limit to how many pictures can be mapped simultaneously (ulNumOutputSurfaces)
2490  * - cuVidDecodePicture may block the calling thread if there are too many pictures pending
2491  *   in the decode queue
2492  */
2493 
2494 /**
2495  * \fn CUresult CUDAAPI cuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci)
2496  * Create the decoder object
2497  */
2498 typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder *phDecoder, CUVIDDECODECREATEINFO *pdci);
2499 
2500 /**
2501  * \fn CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder hDecoder)
2502  * Destroy the decoder object
2503  */
2504 typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder hDecoder);
2505 
2506 /**
2507  * \fn CUresult CUDAAPI cuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams)
2508  * Decode a single picture (field or frame)
2509  */
2510 typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder hDecoder, CUVIDPICPARAMS *pPicParams);
2511 
2512 
2513 #if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL)
2514 /**
2515  * \fn CUresult CUDAAPI cuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx, unsigned int *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
2516  * Post-process and map a video frame for use in cuda
2517  */
2518 typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder hDecoder, int nPicIdx,
2519                                            unsigned int *pDevPtr, unsigned int *pPitch,
2520                                            CUVIDPROCPARAMS *pVPP);
2521 
2522 /**
2523  * \fn CUresult CUDAAPI cuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr)
2524  * Unmap a previously mapped video frame
2525  */
2526 typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder hDecoder, unsigned int DevPtr);
2527 #endif
2528 
2529 #if defined(WIN64) || defined(_WIN64) || defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
2530 /**
2531  * \fn CUresult CUDAAPI cuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr, unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
2532  * map a video frame
2533  */
2534 typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder hDecoder, int nPicIdx, unsigned long long *pDevPtr,
2535                                              unsigned int *pPitch, CUVIDPROCPARAMS *pVPP);
2536 
2537 /**
2538  * \fn CUresult CUDAAPI cuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
2539  * Unmap a previously mapped video frame
2540  */
2541 typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder hDecoder, unsigned long long DevPtr);
2542 
2543 #if defined(__CUVID_DEVPTR64) && !defined(__CUVID_INTERNAL)
2544 #define tcuvidMapVideoFrame      tcuvidMapVideoFrame64
2545 #define tcuvidUnmapVideoFrame    tcuvidUnmapVideoFrame64
2546 #endif
2547 #endif
2548 
2549 
2550 
2551 /**
2552  *
2553  * Context-locking: to facilitate multi-threaded implementations, the following 4 functions
2554  * provide a simple mutex-style host synchronization. If a non-NULL context is specified
2555  * in CUVIDDECODECREATEINFO, the codec library will acquire the mutex associated with the given
2556  * context before making any cuda calls.
2557  * A multi-threaded application could create a lock associated with a context handle so that
2558  * multiple threads can safely share the same cuda context:
2559  *  - use cuCtxPopCurrent immediately after context creation in order to create a 'floating' context
2560  *    that can be passed to cuvidCtxLockCreate.
2561  *  - When using a floating context, all cuda calls should only be made within a cuvidCtxLock/cuvidCtxUnlock section.
2562  *
2563  * NOTE: This is a safer alternative to cuCtxPushCurrent and cuCtxPopCurrent, and is not related to video
2564  * decoder in any way (implemented as a critical section associated with cuCtx{Push|Pop}Current calls).
2565 */
2566 
2567 /**
2568  * \fn CUresult CUDAAPI cuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx)
2569  */
2570 typedef CUresult CUDAAPI tcuvidCtxLockCreate(CUvideoctxlock *pLock, CUcontext ctx);
2571 
2572 /**
2573  * \fn CUresult CUDAAPI cuvidCtxLockDestroy(CUvideoctxlock lck)
2574  */
2575 typedef CUresult CUDAAPI tcuvidCtxLockDestroy(CUvideoctxlock lck);
2576 
2577 /**
2578  * \fn CUresult CUDAAPI cuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags)
2579  */
2580 typedef CUresult CUDAAPI tcuvidCtxLock(CUvideoctxlock lck, unsigned int reserved_flags);
2581 
2582 /**
2583  * \fn CUresult CUDAAPI cuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags)
2584  */
2585 typedef CUresult CUDAAPI tcuvidCtxUnlock(CUvideoctxlock lck, unsigned int reserved_flags);
2586 
2587 /** @} */  /* End VIDEO_DECODER */
2588 ////////////////////////////////////////////////////////////////////////////////////////////////
2589 
2590 
2591 extern tcuvidCreateDecoder        *cuvidCreateDecoder;
2592 extern tcuvidDestroyDecoder       *cuvidDestroyDecoder;
2593 extern tcuvidDecodePicture        *cuvidDecodePicture;
2594 extern tcuvidMapVideoFrame        *cuvidMapVideoFrame;
2595 extern tcuvidUnmapVideoFrame      *cuvidUnmapVideoFrame;
2596 
2597 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)
2598 extern tcuvidMapVideoFrame64      *cuvidMapVideoFrame64;
2599 extern tcuvidUnmapVideoFrame64    *cuvidUnmapVideoFrame64;
2600 #endif
2601 
2602 //extern tcuvidGetVideoFrameSurface *cuvidGetVideoFrameSurface;
2603 
2604 extern tcuvidCtxLockCreate        *cuvidCtxLockCreate;
2605 extern tcuvidCtxLockDestroy       *cuvidCtxLockDestroy;
2606 extern tcuvidCtxLock              *cuvidCtxLock;
2607 extern tcuvidCtxUnlock            *cuvidCtxUnlock;
2608 
2609 ////////////////////////////////////////////////////////////////////////////////////////////////
2610 //
2611 // High-level helper APIs for video sources
2612 //
2613 
2614 typedef void *CUvideosource;
2615 typedef void *CUvideoparser;
2616 typedef long long CUvideotimestamp;
2617 
2618 /**
2619  * \addtogroup VIDEO_PARSER Video Parser
2620  * @{
2621  */
2622 
2623 /*!
2624  * \enum cudaVideoState
2625  * Video Source State
2626  */
2627 typedef enum {
2628     cudaVideoState_Error   = -1,    /**< Error state (invalid source)  */
2629     cudaVideoState_Stopped = 0,     /**< Source is stopped (or reached end-of-stream)  */
2630     cudaVideoState_Started = 1      /**< Source is running and delivering data  */
2631 } cudaVideoState;
2632 
2633 /*!
2634  * \enum cudaAudioCodec
2635  * Audio compression
2636  */
2637 typedef enum {
2638     cudaAudioCodec_MPEG1=0,         /**< MPEG-1 Audio  */
2639     cudaAudioCodec_MPEG2,           /**< MPEG-2 Audio  */
2640     cudaAudioCodec_MP3,             /**< MPEG-1 Layer III Audio  */
2641     cudaAudioCodec_AC3,             /**< Dolby Digital (AC3) Audio  */
2642     cudaAudioCodec_LPCM             /**< PCM Audio  */
2643 } cudaAudioCodec;
2644 
2645 /*!
2646  * \struct CUVIDEOFORMAT
2647  * Video format
2648  */
2649 typedef struct
2650 {
2651     cudaVideoCodec codec;                   /**< Compression format  */
2652    /**
2653     * frame rate = numerator / denominator (for example: 30000/1001)
2654     */
2655     struct {
2656         unsigned int numerator;             /**< frame rate numerator   (0 = unspecified or variable frame rate) */
2657         unsigned int denominator;           /**< frame rate denominator (0 = unspecified or variable frame rate) */
2658     } frame_rate;
2659     unsigned char progressive_sequence;     /**< 0=interlaced, 1=progressive */
2660     unsigned char bit_depth_luma_minus8;    /**< high bit depth Luma */
2661     unsigned char bit_depth_chroma_minus8;  /**< high bit depth Chroma */
2662     unsigned char reserved1;                /**< Reserved for future use */
2663     unsigned int coded_width;               /**< coded frame width */
2664     unsigned int coded_height;              /**< coded frame height  */
2665    /**
2666     *   area of the frame that should be displayed
2667     * typical example:
2668     *   coded_width = 1920, coded_height = 1088
2669     *   display_area = { 0,0,1920,1080 }
2670     */
2671     struct {
2672         int left;                           /**< left position of display rect  */
2673         int top;                            /**< top position of display rect  */
2674         int right;                          /**< right position of display rect  */
2675         int bottom;                         /**< bottom position of display rect  */
2676     } display_area;
2677     cudaVideoChromaFormat chroma_format;    /**<  Chroma format */
2678     unsigned int bitrate;                   /**< video bitrate (bps, 0=unknown) */
2679    /**
2680     * Display Aspect Ratio = x:y (4:3, 16:9, etc)
2681     */
2682     struct {
2683         int x;
2684         int y;
2685     } display_aspect_ratio;
2686     /**
2687     * Video Signal Description
2688     */
2689     struct {
2690         unsigned char video_format          : 3;
2691         unsigned char video_full_range_flag : 1;
2692         unsigned char reserved_zero_bits    : 4;
2693         unsigned char color_primaries;
2694         unsigned char transfer_characteristics;
2695         unsigned char matrix_coefficients;
2696     } video_signal_description;
2697     unsigned int seqhdr_data_length;          /**< Additional bytes following (CUVIDEOFORMATEX)  */
2698 } CUVIDEOFORMAT;
2699 
2700 /*!
2701  * \struct CUVIDEOFORMATEX
2702  * Video format including raw sequence header information
2703  */
2704 typedef struct
2705 {
2706     CUVIDEOFORMAT format;
2707     unsigned char raw_seqhdr_data[1024];
2708 } CUVIDEOFORMATEX;
2709 
2710 /*!
2711  * \struct CUAUDIOFORMAT
2712  * Audio Formats
2713  */
2714 typedef struct
2715 {
2716     cudaAudioCodec codec;       /**< Compression format  */
2717     unsigned int channels;      /**< number of audio channels */
2718     unsigned int samplespersec; /**< sampling frequency */
2719     unsigned int bitrate;       /**< For uncompressed, can also be used to determine bits per sample */
2720     unsigned int reserved1;     /**< Reserved for future use */
2721     unsigned int reserved2;     /**< Reserved for future use */
2722 } CUAUDIOFORMAT;
2723 
2724 
2725 /*!
2726  * \enum CUvideopacketflags
2727  * Data packet flags
2728  */
2729 typedef enum {
2730     CUVID_PKT_ENDOFSTREAM   = 0x01,   /**< Set when this is the last packet for this stream  */
2731     CUVID_PKT_TIMESTAMP     = 0x02,   /**< Timestamp is valid  */
2732     CUVID_PKT_DISCONTINUITY = 0x04    /**< Set when a discontinuity has to be signalled  */
2733 } CUvideopacketflags;
2734 
2735 /*!
2736  * \struct CUVIDSOURCEDATAPACKET
2737  * Data Packet
2738  */
2739 typedef struct _CUVIDSOURCEDATAPACKET
2740 {
2741     unsigned long flags;            /**< Combination of CUVID_PKT_XXX flags */
2742     unsigned long payload_size;     /**< number of bytes in the payload (may be zero if EOS flag is set) */
2743     const unsigned char *payload;   /**< Pointer to packet payload data (may be NULL if EOS flag is set) */
2744     CUvideotimestamp timestamp;     /**< Presentation timestamp (10MHz clock), only valid if CUVID_PKT_TIMESTAMP flag is set */
2745 } CUVIDSOURCEDATAPACKET;
2746 
2747 // Callback for packet delivery
2748 typedef int (CUDAAPI *PFNVIDSOURCECALLBACK)(void *, CUVIDSOURCEDATAPACKET *);
2749 
2750 /*!
2751  * \struct CUVIDSOURCEPARAMS
2752  * Source Params
2753  */
2754 typedef struct _CUVIDSOURCEPARAMS
2755 {
2756     unsigned int ulClockRate;                   /**< Timestamp units in Hz (0=default=10000000Hz)  */
2757     unsigned int uReserved1[7];                 /**< Reserved for future use - set to zero  */
2758     void *pUserData;                            /**< Parameter passed in to the data handlers  */
2759     PFNVIDSOURCECALLBACK pfnVideoDataHandler;   /**< Called to deliver audio packets  */
2760     PFNVIDSOURCECALLBACK pfnAudioDataHandler;   /**< Called to deliver video packets  */
2761     void *pvReserved2[8];                       /**< Reserved for future use - set to NULL */
2762 } CUVIDSOURCEPARAMS;
2763 
2764 /*!
2765  * \enum CUvideosourceformat_flags
2766  * CUvideosourceformat_flags
2767  */
2768 typedef enum {
2769     CUVID_FMT_EXTFORMATINFO = 0x100             /**< Return extended format structure (CUVIDEOFORMATEX) */
2770 } CUvideosourceformat_flags;
2771 
2772 #if !defined(__APPLE__)
2773 /**
2774  * \fn CUresult CUDAAPI cuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams)
2775  * Create Video Source
2776  */
2777 typedef CUresult CUDAAPI tcuvidCreateVideoSource(CUvideosource *pObj, const char *pszFileName, CUVIDSOURCEPARAMS *pParams);
2778 
2779 /**
2780  * \fn CUresult CUDAAPI cuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams)
2781  * Create Video Source
2782  */
2783 typedef CUresult CUDAAPI tcuvidCreateVideoSourceW(CUvideosource *pObj, const wchar_t *pwszFileName, CUVIDSOURCEPARAMS *pParams);
2784 
2785 /**
2786  * \fn CUresult CUDAAPI cuvidDestroyVideoSource(CUvideosource obj)
2787  * Destroy Video Source
2788  */
2789 typedef CUresult CUDAAPI tcuvidDestroyVideoSource(CUvideosource obj);
2790 
2791 /**
2792  * \fn CUresult CUDAAPI cuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state)
2793  * Set Video Source state
2794  */
2795 typedef CUresult CUDAAPI tcuvidSetVideoSourceState(CUvideosource obj, cudaVideoState state);
2796 
2797 /**
2798  * \fn cudaVideoState CUDAAPI cuvidGetVideoSourceState(CUvideosource obj)
2799  * Get Video Source state
2800  */
2801 typedef cudaVideoState CUDAAPI tcuvidGetVideoSourceState(CUvideosource obj);
2802 
2803 /**
2804  * \fn CUresult CUDAAPI cuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags)
2805  * Get Video Source Format
2806  */
2807 typedef CUresult CUDAAPI tcuvidGetSourceVideoFormat(CUvideosource obj, CUVIDEOFORMAT *pvidfmt, unsigned int flags);
2808 
2809 /**
2810  * \fn CUresult CUDAAPI cuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags)
2811  * Set Video Source state
2812  */
2813 typedef CUresult CUDAAPI tcuvidGetSourceAudioFormat(CUvideosource obj, CUAUDIOFORMAT *paudfmt, unsigned int flags);
2814 
2815 #endif
2816 
2817 /**
2818  * \struct CUVIDPARSERDISPINFO
2819  */
2820 typedef struct _CUVIDPARSERDISPINFO
2821 {
2822     int picture_index;         /**<                 */
2823     int progressive_frame;     /**<                 */
2824     int top_field_first;       /**<                 */
2825     int repeat_first_field;    /**< Number of additional fields (1=ivtc, 2=frame doubling, 4=frame tripling, -1=unpaired field)  */
2826     CUvideotimestamp timestamp; /**<     */
2827 } CUVIDPARSERDISPINFO;
2828 
2829 //
2830 // Parser callbacks
2831 // The parser will call these synchronously from within cuvidParseVideoData(), whenever a picture is ready to
2832 // be decoded and/or displayed.
2833 //
2834 typedef int (CUDAAPI *PFNVIDSEQUENCECALLBACK)(void *, CUVIDEOFORMAT *);
2835 typedef int (CUDAAPI *PFNVIDDECODECALLBACK)(void *, CUVIDPICPARAMS *);
2836 typedef int (CUDAAPI *PFNVIDDISPLAYCALLBACK)(void *, CUVIDPARSERDISPINFO *);
2837 
2838 /**
2839  * \struct CUVIDPARSERPARAMS
2840  */
2841 typedef struct _CUVIDPARSERPARAMS
2842 {
2843     cudaVideoCodec CodecType;               /**< cudaVideoCodec_XXX  */
2844     unsigned int ulMaxNumDecodeSurfaces;    /**< Max # of decode surfaces (parser will cycle through these) */
2845     unsigned int ulClockRate;               /**< Timestamp units in Hz (0=default=10000000Hz) */
2846     unsigned int ulErrorThreshold;          /**< % Error threshold (0-100) for calling pfnDecodePicture (100=always call pfnDecodePicture even if picture bitstream is fully corrupted) */
2847     unsigned int ulMaxDisplayDelay;         /**< Max display queue delay (improves pipelining of decode with display) - 0=no delay (recommended values: 2..4) */
2848     unsigned int uReserved1[5];             /**< Reserved for future use - set to 0 */
2849     void *pUserData;                        /**< User data for callbacks */
2850     PFNVIDSEQUENCECALLBACK pfnSequenceCallback; /**< Called before decoding frames and/or whenever there is a format change */
2851     PFNVIDDECODECALLBACK pfnDecodePicture;      /**< Called when a picture is ready to be decoded (decode order) */
2852     PFNVIDDISPLAYCALLBACK pfnDisplayPicture;    /**< Called whenever a picture is ready to be displayed (display order)  */
2853     void *pvReserved2[7];                       /**< Reserved for future use - set to NULL */
2854     CUVIDEOFORMATEX *pExtVideoInfo;             /**< [Optional] sequence header data from system layer */
2855 } CUVIDPARSERPARAMS;
2856 
2857 /**
2858  * \fn CUresult CUDAAPI cuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams)
2859  */
2860 typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser *pObj, CUVIDPARSERPARAMS *pParams);
2861 
2862 /**
2863  * \fn CUresult CUDAAPI cuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket)
2864  */
2865 typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser obj, CUVIDSOURCEDATAPACKET *pPacket);
2866 
2867 /**
2868  * \fn CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser obj)
2869  */
2870 typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser obj);
2871 
2872 #if !defined(__APPLE__)
2873 extern tcuvidCreateVideoSource               *cuvidCreateVideoSource;
2874 extern tcuvidCreateVideoSourceW              *cuvidCreateVideoSourceW;
2875 extern tcuvidDestroyVideoSource              *cuvidDestroyVideoSource;
2876 extern tcuvidSetVideoSourceState             *cuvidSetVideoSourceState;
2877 extern tcuvidGetVideoSourceState             *cuvidGetVideoSourceState;
2878 extern tcuvidGetSourceVideoFormat            *cuvidGetSourceVideoFormat;
2879 extern tcuvidGetSourceAudioFormat            *cuvidGetSourceAudioFormat;
2880 #endif
2881 
2882 
2883 extern tcuvidCreateVideoParser               *cuvidCreateVideoParser;
2884 extern tcuvidParseVideoData                  *cuvidParseVideoData;
2885 extern tcuvidDestroyVideoParser              *cuvidDestroyVideoParser;
2886 
2887 /** @} */  /* END VIDEO_PARSER */
2888 ////////////////////////////////////////////////////////////////////////////////////////////////
2889 
2890 const char *cudaGetErrorEnum(CUresult error);
2891 
2892 #ifdef __cplusplus
2893 }
2894 #endif
2895 
2896 #endif // defined(WIN32) || defined(GPAC_CONFIG_LINUX)
2897 
2898 #endif //__cuda_tools_h__
2899 
2900