1"""
2Enum values for CUDA driver
3"""
4
5
6CUDA_SUCCESS                              = 0
7CUDA_ERROR_INVALID_VALUE                  = 1
8CUDA_ERROR_OUT_OF_MEMORY                  = 2
9CUDA_ERROR_NOT_INITIALIZED                = 3
10CUDA_ERROR_DEINITIALIZED                  = 4
11CUDA_ERROR_PROFILER_DISABLED              = 5
12CUDA_ERROR_PROFILER_NOT_INITIALIZED       = 6
13CUDA_ERROR_PROFILER_ALREADY_STARTED       = 7
14CUDA_ERROR_PROFILER_ALREADY_STOPPED       = 8
15CUDA_ERROR_NO_DEVICE                      = 100
16CUDA_ERROR_INVALID_DEVICE                 = 101
17CUDA_ERROR_INVALID_IMAGE                  = 200
18CUDA_ERROR_INVALID_CONTEXT                = 201
19CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202
20CUDA_ERROR_MAP_FAILED                     = 205
21CUDA_ERROR_UNMAP_FAILED                   = 206
22CUDA_ERROR_ARRAY_IS_MAPPED                = 207
23CUDA_ERROR_ALREADY_MAPPED                 = 208
24CUDA_ERROR_NO_BINARY_FOR_GPU              = 209
25CUDA_ERROR_ALREADY_ACQUIRED               = 210
26CUDA_ERROR_NOT_MAPPED                     = 211
27CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212
28CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213
29CUDA_ERROR_ECC_UNCORRECTABLE              = 214
30CUDA_ERROR_UNSUPPORTED_LIMIT              = 215
31CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216
32CUDA_ERROR_INVALID_SOURCE                 = 300
33CUDA_ERROR_FILE_NOT_FOUND                 = 301
34CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302
35CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303
36CUDA_ERROR_OPERATING_SYSTEM               = 304
37CUDA_ERROR_INVALID_HANDLE                 = 400
38CUDA_ERROR_NOT_FOUND                      = 500
39CUDA_ERROR_NOT_READY                      = 600
40CUDA_ERROR_LAUNCH_FAILED                  = 700
41CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701
42CUDA_ERROR_LAUNCH_TIMEOUT                 = 702
43CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703
44CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704
45CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        = 705
46CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708
47CUDA_ERROR_CONTEXT_IS_DESTROYED           = 709
48CUDA_ERROR_ASSERT                         = 710
49CUDA_ERROR_TOO_MANY_PEERS                 = 711
50CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712
51CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713
52CUDA_ERROR_HARDWARE_STACK_ERROR           = 714
53CUDA_ERROR_ILLEGAL_INSTRUCTION            = 715
54CUDA_ERROR_MISALIGNED_ADDRESS             = 716
55CUDA_ERROR_INVALID_ADDRESS_SPACE          = 717
56CUDA_ERROR_INVALID_PC                     = 718
57CUDA_ERROR_LAUNCH_FAILED                  = 719
58CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720
59CUDA_ERROR_NOT_PERMITTED                  = 800
60CUDA_ERROR_NOT_SUPPORTED                  = 801
61CUDA_ERROR_UNKNOWN                        = 999
62
63
64# no preference for shared memory or L1 (default)
65CU_FUNC_CACHE_PREFER_NONE    = 0x00
66# prefer larger shared memory and smaller L1 cache
67CU_FUNC_CACHE_PREFER_SHARED  = 0x01
68# prefer larger L1 cache and smaller shared memory
69CU_FUNC_CACHE_PREFER_L1      = 0x02
70# prefer equal sized L1 cache and shared memory
71CU_FUNC_CACHE_PREFER_EQUAL   = 0x03
72
73# Automatic scheduling
74CU_CTX_SCHED_AUTO          = 0x00
75# Set spin as default scheduling
76CU_CTX_SCHED_SPIN          = 0x01
77# Set yield as default scheduling
78CU_CTX_SCHED_YIELD         = 0x02
79# Set blocking synchronization as default scheduling
80CU_CTX_SCHED_BLOCKING_SYNC = 0x04
81
82CU_CTX_SCHED_MASK          = 0x07
83
84# Support mapped pinned allocations
85CU_CTX_MAP_HOST            = 0x08
86# Keep local memory allocation after launch
87CU_CTX_LMEM_RESIZE_TO_MAX  = 0x10
88
89CU_CTX_FLAGS_MASK          = 0x1f
90
91
92
93# If set, host memory is portable between CUDA contexts.
94# Flag for cuMemHostAlloc()
95CU_MEMHOSTALLOC_PORTABLE = 0x01
96
97# If set, host memory is mapped into CUDA address space and
98# cuMemHostGetDevicePointer() may be called on the host pointer.
99# Flag for cuMemHostAlloc()
100CU_MEMHOSTALLOC_DEVICEMAP = 0x02
101
102# If set, host memory is allocated as write-combined - fast to write,
103# faster to DMA, slow to read except via SSE4 streaming load instruction
104# (MOVNTDQA).
105# Flag for cuMemHostAlloc()
106CU_MEMHOSTALLOC_WRITECOMBINED = 0x04
107
108# If set, host memory is portable between CUDA contexts.
109# Flag for cuMemHostRegister()
110CU_MEMHOSTREGISTER_PORTABLE = 0x01
111
112# If set, host memory is mapped into CUDA address space and
113# cuMemHostGetDevicePointer() may be called on the host pointer.
114# Flag for cuMemHostRegister()
115CU_MEMHOSTREGISTER_DEVICEMAP = 0x02
116
117
118# Default event flag
119CU_EVENT_DEFAULT        = 0x0
120# Event uses blocking synchronization
121CU_EVENT_BLOCKING_SYNC  = 0x1
122# Event will not record timing data
123CU_EVENT_DISABLE_TIMING = 0x2
124# Event is suitable for interprocess use. CU_EVENT_DISABLE_TIMING must be set
125CU_EVENT_INTERPROCESS   = 0x4
126
127# The CUcontext on which a pointer was allocated or registered
128CU_POINTER_ATTRIBUTE_CONTEXT = 1
129# The CUmemorytype describing the physical location of a pointer
130CU_POINTER_ATTRIBUTE_MEMORY_TYPE = 2
131# The address at which a pointer's memory may be accessed on the device
132CU_POINTER_ATTRIBUTE_DEVICE_POINTER = 3
133# The address at which a pointer's memory may be accessed on the host
134CU_POINTER_ATTRIBUTE_HOST_POINTER = 4
135# A pair of tokens for use with the nv-p2p.h Linux kernel interface
136CU_POINTER_ATTRIBUTE_P2P_TOKENS = 5
137
138# Host memory
139CU_MEMORYTYPE_HOST    = 0x01
140# Device memory
141CU_MEMORYTYPE_DEVICE  = 0x02
142# Array memory
143CU_MEMORYTYPE_ARRAY   = 0x03
144# Unified device or host memory
145CU_MEMORYTYPE_UNIFIED = 0x04
146
147
148
149# Compiled device-class-specific device code
150# Applicable options: none
151CU_JIT_INPUT_CUBIN = 0
152
153# PTX source code
154# Applicable options: PTX compiler options
155CU_JIT_INPUT_PTX = 1
156
157# Bundle of multiple cubins and/or PTX of some device code
158# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
159CU_JIT_INPUT_FATBINAR = 2
160
161# Host object with embedded device code
162# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
163CU_JIT_INPUT_OBJECT = 3
164
165# Archive of host objects with embedded device code
166# Applicable options: PTX compiler options, ::CU_JIT_FALLBACK_STRATEGY
167CU_JIT_INPUT_LIBRARY = 4
168
169
170
171# Max number of registers that a thread may use.
172# Option type: unsigned int
173# Applies to: compiler only
174
175CU_JIT_MAX_REGISTERS = 0
176
177
178# IN: Specifies minimum number of threads per block to target compilation
179# for
180# OUT: Returns the number of threads the compiler actually targeted.
181# This restricts the resource utilization fo the compiler (e.g. max
182# registers) such that a block with the given number of threads should be
183# able to launch based on register limitations. Note, this option does not
184# currently take into account any other resource limitations, such as
185# shared memory utilization.
186# Cannot be combined with ::CU_JIT_TARGET.
187# Option type: unsigned int
188# Applies to: compiler only
189
190CU_JIT_THREADS_PER_BLOCK = 1
191
192
193# Overwrites the option value with the total wall clock time, in
194# milliseconds, spent in the compiler and linker
195# Option type: float
196# Applies to: compiler and linker
197
198CU_JIT_WALL_TIME = 2
199
200
201# Pointer to a buffer in which to print any log messages
202# that are informational in nature (the buffer size is specified via
203# option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES)
204# Option type: char *
205# Applies to: compiler and linker
206
207CU_JIT_INFO_LOG_BUFFER = 3
208
209
210# IN: Log buffer size in bytes.  Log messages will be capped at this size
211# (including null terminator)
212# OUT: Amount of log buffer filled with messages
213# Option type: unsigned int
214# Applies to: compiler and linker
215
216CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES = 4
217
218
219# Pointer to a buffer in which to print any log messages that
220# reflect errors (the buffer size is specified via option
221# ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)
222# Option type: char *
223# Applies to: compiler and linker
224
225CU_JIT_ERROR_LOG_BUFFER = 5
226
227
228# IN: Log buffer size in bytes.  Log messages will be capped at this size
229# (including null terminator)
230# OUT: Amount of log buffer filled with messages
231# Option type: unsigned int
232# Applies to: compiler and linker
233
234CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES = 6
235
236
237# Level of optimizations to apply to generated code (0 - 4), with 4
238# being the default and highest level of optimizations.
239# Option type: unsigned int
240# Applies to: compiler only
241
242CU_JIT_OPTIMIZATION_LEVEL = 7
243
244
245# No option value required. Determines the target based on the current
246# attached context (default)
247# Option type: No option value needed
248# Applies to: compiler and linker
249
250CU_JIT_TARGET_FROM_CUCONTEXT = 8
251
252
253# Target is chosen based on supplied ::CUjit_target.  Cannot be
254# combined with ::CU_JIT_THREADS_PER_BLOCK.
255# Option type: unsigned int for enumerated type ::CUjit_target
256# Applies to: compiler and linker
257
258CU_JIT_TARGET = 9
259
260
261# Specifies choice of fallback strategy if matching cubin is not found.
262# Choice is based on supplied ::CUjit_fallback.
263# Option type: unsigned int for enumerated type ::CUjit_fallback
264# Applies to: compiler only
265
266CU_JIT_FALLBACK_STRATEGY = 10
267
268
269# Specifies whether to create debug information in output (-g)
270# (0: false, default)
271# Option type: int
272# Applies to: compiler and linker
273
274CU_JIT_GENERATE_DEBUG_INFO = 11
275
276
277# Generate verbose log messages (0: false, default)
278# Option type: int
279# Applies to: compiler and linker
280
281CU_JIT_LOG_VERBOSE = 12
282
283
284# Generate line number information (-lineinfo) (0: false, default)
285# Option type: int
286# Applies to: compiler only
287
288CU_JIT_GENERATE_LINE_INFO = 13
289
290
291# Specifies whether to enable caching explicitly (-dlcm)
292# Choice is based on supplied ::CUjit_cacheMode_enum.
293# Option type: unsigned int for enumerated type ::CUjit_cacheMode_enum
294# Applies to: compiler only
295
296CU_JIT_CACHE_MODE = 14
297
298
299# Device attributes
300
301
302CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1
303CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2
304CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3
305CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4
306CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5
307CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6
308CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7
309CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8
310CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9
311CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10
312CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11
313CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12
314CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13
315CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14
316CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15
317CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16
318CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17
319CU_DEVICE_ATTRIBUTE_INTEGRATED = 18
320CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19
321CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20
322CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_WIDTH = 21
323CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_WIDTH = 22
324CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_HEIGHT = 23
325CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH = 24
326CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT = 25
327CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH = 26
328CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_WIDTH = 27
329CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_HEIGHT = 28
330CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LAYERED_LAYERS = 29
331CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30
332CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31
333CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32
334CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33
335CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34
336CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35
337CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36
338CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37
339CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38
340CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTI_PROCESSOR = 39
341CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40
342CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41
343CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_WIDTH = 42
344CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LAYERED_LAYERS = 43
345CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_WIDTH = 45
346CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_GATHER_HEIGHT = 46
347CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_WIDTH_ALT = 47
348CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_HEIGHT_ALT = 48
349CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_3D_DEPTH_ALT = 49
350CU_DEVICE_ATTRIBUTE_PCI_DOMAIN_ID = 50
351CU_DEVICE_ATTRIBUTE_TEXTURE_PITCH_ALIGNMENT = 51
352CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_WIDTH = 52
353CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_WIDTH = 53
354CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_CUBEMAP_LAYERED_LAYERS = 54
355CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_WIDTH = 55
356CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_WIDTH = 56
357CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_HEIGHT = 57
358CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_WIDTH = 58
359CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_HEIGHT = 59
360CU_DEVICE_ATTRIBUTE_MAX_SURFACE_3D_DEPTH = 60
361CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_WIDTH = 61
362CU_DEVICE_ATTRIBUTE_MAX_SURFACE_1D_LAYERED_LAYERS = 62
363CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_WIDTH = 63
364CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_HEIGHT = 64
365CU_DEVICE_ATTRIBUTE_MAX_SURFACE_2D_LAYERED_LAYERS = 65
366CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_WIDTH = 66
367CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_WIDTH = 67
368CU_DEVICE_ATTRIBUTE_MAX_SURFACE_CUBEMAP_LAYERED_LAYERS = 68
369CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_LINEAR_WIDTH = 69
370CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_WIDTH = 70
371CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_HEIGHT = 71
372CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_LINEAR_PITCH = 72
373CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_2D_MIPMAPPED_WIDTH = 73
374CU_DEVICE_ATTRIBUTE_MAX_MAX_TEXTURE_2D_MIPMAPPED_HEIGHT = 74
375CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75
376CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
377CU_DEVICE_ATTRIBUTE_MAX_TEXTURE_1D_MIPMAPPED_WIDTH = 77
378CU_DEVICE_ATTRIBUTE_STREAM_PRIORITIES_SUPPORTED = 78
379CU_DEVICE_ATTRIBUTE_GLOBAL_L1_CACHE_SUPPORTED = 79
380CU_DEVICE_ATTRIBUTE_LOCAL_L1_CACHE_SUPPORTED = 80
381CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR = 81
382CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_MULTIPROCESSOR = 82
383CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY = 83
384CU_DEVICE_ATTRIBUTE_IS_MULTI_GPU_BOARD = 84
385CU_DEVICE_ATTRIBUTE_MULTI_GPU_BOARD_GROUP_ID = 85
386CU_DEVICE_ATTRIBUTE_HOST_NATIVE_ATOMIC_SUPPORTED = 86
387CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO = 87
388CU_DEVICE_ATTRIBUTE_PAGEABLE_MEMORY_ACCESS = 88
389CU_DEVICE_ATTRIBUTE_CONCURRENT_MANAGED_ACCESS = 89
390CU_DEVICE_ATTRIBUTE_COMPUTE_PREEMPTION_SUPPORTED = 90
391CU_DEVICE_ATTRIBUTE_CAN_USE_HOST_POINTER_FOR_REGISTERED_MEM = 91
392CU_DEVICE_ATTRIBUTE_COOPERATIVE_LAUNCH = 95
393CU_DEVICE_ATTRIBUTE_COOPERATIVE_MULTI_DEVICE_LAUNCH = 96
394CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN = 97
395
396
397# CUfunction_attribute
398
399# The maximum number of threads per block, beyond which a launch of the
400# function would fail. This number depends on both the function and the
401# device on which the function is currently loaded.
402CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0
403
404# The size in bytes of statically-allocated shared memory required by
405# this function. This does not include dynamically-allocated shared
406# memory requested by the user at runtime.
407CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1
408
409# The size in bytes of user-allocated constant memory required by this
410# function.
411CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2
412
413# The size in bytes of local memory used by each thread of this function.
414CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3
415
416# The number of registers used by each thread of this function.
417CU_FUNC_ATTRIBUTE_NUM_REGS = 4
418
419# The PTX virtual architecture version for which the function was
420# compiled. This value is the major PTX version * 10 + the minor PTX
421# version, so a PTX version 1.3 function would return the value 13.
422# Note that this may return the undefined value of 0 for cubins
423# compiled prior to CUDA 3.0.
424CU_FUNC_ATTRIBUTE_PTX_VERSION = 5
425
426# The binary architecture version for which the function was compiled.
427# This value is the major binary version * 10 + the minor binary version,
428# so a binary version 1.3 function would return the value 13. Note that
429# this will return a value of 10 for legacy cubins that do not have a
430# properly-encoded binary architecture version.
431CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6
432