1 /* 2 * This copyright notice applies to this header file only: 3 * 4 * Copyright (c) 2016 5 * 6 * Permission is hereby granted, free of charge, to any person 7 * obtaining a copy of this software and associated documentation 8 * files (the "Software"), to deal in the Software without 9 * restriction, including without limitation the rights to use, 10 * copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the software, and to permit persons to whom the 12 * software is furnished to do so, subject to the following 13 * conditions: 14 * 15 * The above copyright notice and this permission notice shall be 16 * included in all copies or substantial portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 20 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 21 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 22 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 23 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 * OTHER DEALINGS IN THE SOFTWARE. 26 */ 27 28 #ifndef __cuda_cuda_h__ 29 #define __cuda_cuda_h__ 30 31 #include <stddef.h> 32 33 #define CUDA_VERSION 7050 34 35 #if defined(_WIN32) || defined(__CYGWIN__) 36 #define CUDAAPI __stdcall 37 #else 38 #define CUDAAPI 39 #endif 40 41 typedef int CUdevice; 42 typedef struct CUarray_st *CUarray; /**< CUDA array */ 43 typedef struct CUctx_st *CUcontext; /**< CUDA context */ 44 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) 45 typedef unsigned long long CUdeviceptr; 46 #else 47 typedef unsigned int CUdeviceptr; 48 #endif 49 typedef struct CUstream_st *CUstream; /**< CUDA stream */ 50 typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphics interop resource */ 51 52 /** 53 * Context creation flags 54 */ 55 typedef enum CUctx_flags_enum 56 { 57 CU_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ 58 CU_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ 59 CU_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ 60 CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ 61 CU_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling \deprecated */ 62 CU_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ 63 CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ 64 #if __CUDA_API_VERSION < 4000 65 CU_CTX_SCHED_MASK = 0x03, 66 CU_CTX_FLAGS_MASK = 0x1f 67 #else 68 CU_CTX_SCHED_MASK = 0x07, 69 CU_CTX_PRIMARY = 0x20, /**< Initialize and return the primary context */ 70 CU_CTX_FLAGS_MASK = 0x3f 71 #endif 72 } CUctx_flags; 73 74 /** 75 * Stream creation flags 76 */ 77 typedef enum CUstream_flags_enum { 78 CU_STREAM_DEFAULT = 0x0, /**< Default stream flag */ 79 CU_STREAM_NON_BLOCKING = 0x1 /**< Stream does not synchronize with stream 0 (the NULL stream) */ 80 } CUstream_flags; 81 82 /** 83 * Device properties 84 */ 85 typedef enum CUdevice_attribute_enum 86 { 87 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */ 88 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */ 89 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */ 90 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */ 91 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */ 92 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */ 93 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */ 94 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */ 95 CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ 96 CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */ 97 CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */ 98 CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */ 99 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */ 100 CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ 101 CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Peak clock frequency in kilohertz */ 102 CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */ 103 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently */ 104 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */ 105 CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */ 106 CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */ 107 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */ 108 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See ::CUcomputemode for details) */ 109 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */ 110 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */ 111 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */ 112 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */ 113 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */ 114 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */ 115 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Maximum texture array width */ 116 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Maximum texture array height */ 117 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximum slices in a texture array */ 118 CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */ 119 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */ 120 CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */ 121 CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */ 122 CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */ 123 CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 /**< Device is using TCC driver model */ 124 #if __CUDA_API_VERSION >= 4000 125 , CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */ 126 CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */ 127 CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */ 128 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */ 129 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */ 130 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device uses shares a unified address space with the host */ 131 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */ 132 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43 /**< Maximum layers in a 1D layered texture */ 133 #endif 134 } CUdevice_attribute; 135 136 137 /** 138 * Error codes 139 */ 140 typedef enum cudaError_enum 141 { 142 /** 143 * The API call returned with no errors. In the case of query calls, this 144 * can also mean that the operation being queried is complete (see 145 * ::cuEventQuery() and ::cuStreamQuery()). 146 */ 147 CUDA_SUCCESS = 0, 148 149 /** 150 * This indicates that one or more of the parameters passed to the API call 151 * is not within an acceptable range of values. 152 */ 153 CUDA_ERROR_INVALID_VALUE = 1, 154 155 /** 156 * The API call failed because it was unable to allocate enough memory to 157 * perform the requested operation. 158 */ 159 CUDA_ERROR_OUT_OF_MEMORY = 2, 160 161 /** 162 * This indicates that the CUDA driver has not been initialized with 163 * ::cuInit() or that initialization has failed. 164 */ 165 CUDA_ERROR_NOT_INITIALIZED = 3, 166 167 /** 168 * This indicates that the CUDA driver is in the process of shutting down. 169 */ 170 CUDA_ERROR_DEINITIALIZED = 4, 171 172 /** 173 * This indicates profiling APIs are called while application is running 174 * in visual profiler mode. 175 */ 176 CUDA_ERROR_PROFILER_DISABLED = 5, 177 /** 178 * This indicates profiling has not been initialized for this context. 179 * Call cuProfilerInitialize() to resolve this. 180 */ 181 CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, 182 /** 183 * This indicates profiler has already been started and probably 184 * cuProfilerStart() is incorrectly called. 185 */ 186 CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, 187 /** 188 * This indicates profiler has already been stopped and probably 189 * cuProfilerStop() is incorrectly called. 190 */ 191 CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, 192 /** 193 * This indicates that no CUDA-capable devices were detected by the installed 194 * CUDA driver. 195 */ 196 CUDA_ERROR_NO_DEVICE = 100, 197 198 /** 199 * This indicates that the device ordinal supplied by the user does not 200 * correspond to a valid CUDA device. 201 */ 202 CUDA_ERROR_INVALID_DEVICE = 101, 203 204 205 /** 206 * This indicates that the device kernel image is invalid. This can also 207 * indicate an invalid CUDA module. 208 */ 209 CUDA_ERROR_INVALID_IMAGE = 200, 210 211 /** 212 * This most frequently indicates that there is no context bound to the 213 * current thread. This can also be returned if the context passed to an 214 * API call is not a valid handle (such as a context that has had 215 * ::cuCtxDestroy() invoked on it). This can also be returned if a user 216 * mixes different API versions (i.e. 3010 context with 3020 API calls). 217 * See ::cuCtxGetApiVersion() for more details. 218 */ 219 CUDA_ERROR_INVALID_CONTEXT = 201, 220 221 /** 222 * This indicated that the context being supplied as a parameter to the 223 * API call was already the active context. 224 * \deprecated 225 * This error return is deprecated as of CUDA 3.2. It is no longer an 226 * error to attempt to push the active context via ::cuCtxPushCurrent(). 227 */ 228 CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, 229 230 /** 231 * This indicates that a map or register operation has failed. 232 */ 233 CUDA_ERROR_MAP_FAILED = 205, 234 235 /** 236 * This indicates that an unmap or unregister operation has failed. 237 */ 238 CUDA_ERROR_UNMAP_FAILED = 206, 239 240 /** 241 * This indicates that the specified array is currently mapped and thus 242 * cannot be destroyed. 243 */ 244 CUDA_ERROR_ARRAY_IS_MAPPED = 207, 245 246 /** 247 * This indicates that the resource is already mapped. 248 */ 249 CUDA_ERROR_ALREADY_MAPPED = 208, 250 251 /** 252 * This indicates that there is no kernel image available that is suitable 253 * for the device. This can occur when a user specifies code generation 254 * options for a particular CUDA source file that do not include the 255 * corresponding device configuration. 256 */ 257 CUDA_ERROR_NO_BINARY_FOR_GPU = 209, 258 259 /** 260 * This indicates that a resource has already been acquired. 261 */ 262 CUDA_ERROR_ALREADY_ACQUIRED = 210, 263 264 /** 265 * This indicates that a resource is not mapped. 266 */ 267 CUDA_ERROR_NOT_MAPPED = 211, 268 269 /** 270 * This indicates that a mapped resource is not available for access as an 271 * array. 272 */ 273 CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, 274 275 /** 276 * This indicates that a mapped resource is not available for access as a 277 * pointer. 278 */ 279 CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, 280 281 /** 282 * This indicates that an uncorrectable ECC error was detected during 283 * execution. 284 */ 285 CUDA_ERROR_ECC_UNCORRECTABLE = 214, 286 287 /** 288 * This indicates that the ::CUlimit passed to the API call is not 289 * supported by the active device. 290 */ 291 CUDA_ERROR_UNSUPPORTED_LIMIT = 215, 292 293 /** 294 * This indicates that the ::CUcontext passed to the API call can 295 * only be bound to a single CPU thread at a time but is already 296 * bound to a CPU thread. 297 */ 298 CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, 299 300 /** 301 * This indicates that the device kernel source is invalid. 302 */ 303 CUDA_ERROR_INVALID_SOURCE = 300, 304 305 /** 306 * This indicates that the file specified was not found. 307 */ 308 CUDA_ERROR_FILE_NOT_FOUND = 301, 309 310 /** 311 * This indicates that a link to a shared object failed to resolve. 312 */ 313 CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, 314 315 /** 316 * This indicates that initialization of a shared object failed. 317 */ 318 CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, 319 320 /** 321 * This indicates that an OS call failed. 322 */ 323 CUDA_ERROR_OPERATING_SYSTEM = 304, 324 325 326 /** 327 * This indicates that a resource handle passed to the API call was not 328 * valid. Resource handles are opaque types like ::CUstream and ::CUevent. 329 */ 330 CUDA_ERROR_INVALID_HANDLE = 400, 331 332 333 /** 334 * This indicates that a named symbol was not found. Examples of symbols 335 * are global/constant variable names, texture names, and surface names. 336 */ 337 CUDA_ERROR_NOT_FOUND = 500, 338 339 340 /** 341 * This indicates that asynchronous operations issued previously have not 342 * completed yet. This result is not actually an error, but must be indicated 343 * differently than ::CUDA_SUCCESS (which indicates completion). Calls that 344 * may return this value include ::cuEventQuery() and ::cuStreamQuery(). 345 */ 346 CUDA_ERROR_NOT_READY = 600, 347 348 349 /** 350 * An exception occurred on the device while executing a kernel. Common 351 * causes include dereferencing an invalid device pointer and accessing 352 * out of bounds shared memory. The context cannot be used, so it must 353 * be destroyed (and a new one should be created). All existing device 354 * memory allocations from this context are invalid and must be 355 * reconstructed if the program is to continue using CUDA. 356 */ 357 CUDA_ERROR_LAUNCH_FAILED = 700, 358 359 /** 360 * This indicates that a launch did not occur because it did not have 361 * appropriate resources. This error usually indicates that the user has 362 * attempted to pass too many arguments to the device kernel, or the 363 * kernel launch specifies too many threads for the kernel's register 364 * count. Passing arguments of the wrong size (i.e. a 64-bit pointer 365 * when a 32-bit int is expected) is equivalent to passing too many 366 * arguments and can also result in this error. 367 */ 368 CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, 369 370 /** 371 * This indicates that the device kernel took too long to execute. This can 372 * only occur if timeouts are enabled - see the device attribute 373 * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The 374 * context cannot be used (and must be destroyed similar to 375 * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from 376 * this context are invalid and must be reconstructed if the program is to 377 * continue using CUDA. 378 */ 379 CUDA_ERROR_LAUNCH_TIMEOUT = 702, 380 381 /** 382 * This error indicates a kernel launch that uses an incompatible texturing 383 * mode. 384 */ 385 CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, 386 387 /** 388 * This error indicates that a call to ::cuCtxEnablePeerAccess() is 389 * trying to re-enable peer access to a context which has already 390 * had peer access to it enabled. 391 */ 392 CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, 393 394 /** 395 * This error indicates that a call to ::cuMemPeerRegister is trying to 396 * register memory from a context which has not had peer access 397 * enabled yet via ::cuCtxEnablePeerAccess(), or that 398 * ::cuCtxDisablePeerAccess() is trying to disable peer access 399 * which has not been enabled yet. 400 */ 401 CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, 402 403 /** 404 * This error indicates that a call to ::cuMemPeerRegister is trying to 405 * register already-registered memory. 406 */ 407 CUDA_ERROR_PEER_MEMORY_ALREADY_REGISTERED = 706, 408 409 /** 410 * This error indicates that a call to ::cuMemPeerUnregister is trying to 411 * unregister memory that has not been registered. 412 */ 413 CUDA_ERROR_PEER_MEMORY_NOT_REGISTERED = 707, 414 415 /** 416 * This error indicates that ::cuCtxCreate was called with the flag 417 * ::CU_CTX_PRIMARY on a device which already has initialized its 418 * primary context. 419 */ 420 CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, 421 422 /** 423 * This error indicates that the context current to the calling thread 424 * has been destroyed using ::cuCtxDestroy, or is a primary context which 425 * has not yet been initialized. 426 */ 427 CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, 428 429 /** 430 * This indicates that an unknown internal error has occurred. 431 */ 432 CUDA_ERROR_UNKNOWN = 999 433 } CUresult; 434 435 /** 436 * Memory types 437 */ 438 typedef enum CUmemorytype_enum 439 { 440 CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */ 441 CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */ 442 CU_MEMORYTYPE_ARRAY = 0x03 /**< Array memory */ 443 #if __CUDA_API_VERSION >= 4000 444 , CU_MEMORYTYPE_UNIFIED = 0x04 /**< Unified device or host memory */ 445 #endif 446 } CUmemorytype; 447 448 /** 449 * Compute Modes 450 */ 451 typedef enum CUcomputemode_enum 452 { 453 CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple contexts allowed per device) */ 454 CU_COMPUTEMODE_EXCLUSIVE = 1, /**< Compute-exclusive-thread mode (Only one context used by a single thread can be present on this device at a time) */ 455 CU_COMPUTEMODE_PROHIBITED = 2 /**< Compute-prohibited mode (No contexts can be created on this device at this time) */ 456 #if __CUDA_API_VERSION >= 4000 457 , CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 /**< Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time) */ 458 #endif 459 } CUcomputemode; 460 461 /** 462 * Flags to register a graphics resource 463 */ 464 typedef enum CUgraphicsRegisterFlags_enum 465 { 466 CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00, 467 CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01, 468 CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02, 469 CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04 470 } CUgraphicsRegisterFlags; 471 472 /** 473 * Flags for mapping and unmapping interop resources 474 */ 475 typedef enum CUgraphicsMapResourceFlags_enum 476 { 477 CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, 478 CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, 479 CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 480 } CUgraphicsMapResourceFlags; 481 482 typedef struct CUDA_MEMCPY2D_st { 483 size_t srcXInBytes; 484 size_t srcY; 485 CUmemorytype srcMemoryType; 486 const void *srcHost; 487 CUdeviceptr srcDevice; 488 CUarray srcArray; 489 size_t srcPitch; 490 491 size_t dstXInBytes; 492 size_t dstY; 493 CUmemorytype dstMemoryType; 494 void *dstHost; 495 CUdeviceptr dstDevice; 496 CUarray dstArray; 497 size_t dstPitch; 498 499 size_t WidthInBytes; 500 size_t Height; 501 } CUDA_MEMCPY2D; 502 #endif 503