1 #ifndef DARKCUDA_H 2 #define DARKCUDA_H 3 #include "darknet.h" 4 5 #ifdef __cplusplus 6 extern "C" { 7 #endif 8 9 10 extern int cuda_debug_sync; 11 extern int gpu_index; 12 #ifdef __cplusplus 13 } 14 #endif // __cplusplus 15 16 #ifdef GPU 17 18 #define BLOCK 512 19 #define FULL_MASK 0xffffffff 20 #define WARP_SIZE 32 21 #define BLOCK_TRANSPOSE32 256 22 23 #include <cuda.h> 24 #include <cuda_runtime.h> 25 #include <curand.h> 26 #include <cublas_v2.h> 27 #include <cuda_runtime_api.h> 28 //#include <driver_types.h> 29 30 #ifdef CUDNN 31 #include <cudnn.h> 32 #endif // CUDNN 33 34 #ifndef __DATE__ 35 #define __DATE__ 36 #endif 37 38 #ifndef __TIME__ 39 #define __TIME__ 40 #endif 41 42 #ifndef __FUNCTION__ 43 #define __FUNCTION__ 44 #endif 45 46 #ifndef __LINE__ 47 #define __LINE__ 0 48 #endif 49 50 #ifndef __FILE__ 51 #define __FILE__ 52 #endif 53 54 #ifdef __cplusplus 55 extern "C" { 56 #endif // __cplusplus 57 void check_error(cudaError_t status); 58 void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time); 59 #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); 60 61 cublasHandle_t blas_handle(); 62 void free_pinned_memory(); 63 void pre_allocate_pinned_memory(size_t size); 64 float *cuda_make_array_pinned_preallocated(float *x, size_t n); 65 float *cuda_make_array_pinned(float *x, size_t n); 66 float *cuda_make_array(float *x, size_t n); 67 void **cuda_make_array_pointers(void **x, size_t n); 68 int *cuda_make_int_array(size_t n); 69 int *cuda_make_int_array_new_api(int *x, size_t n); 70 void cuda_push_array(float *x_gpu, float *x, size_t n); 71 //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n); 72 //LIB_API void cuda_set_device(int n); 73 int cuda_get_device(); 74 void cuda_free_host(float *x_cpu); 75 void cuda_free(float *x_gpu); 76 void cuda_random(float *x_gpu, size_t n); 77 float cuda_compare(float *x_gpu, float *x, size_t n, char *s); 78 dim3 cuda_gridsize(size_t n); 79 cudaStream_t get_cuda_stream(); 80 cudaStream_t get_cuda_memcpy_stream(); 81 int get_number_of_blocks(int array_size, int block_size); 82 int get_gpu_compute_capability(int i, char *device_name); 83 void show_cuda_cudnn_info(); 84 85 #ifdef CUDNN 86 cudnnHandle_t cudnn_handle(); 87 enum {cudnn_fastest, cudnn_smallest, cudnn_specify}; 88 89 void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time); 90 #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__, __DATE__ " - " __TIME__ ); 91 #endif 92 93 #ifdef __cplusplus 94 } 95 #endif // __cplusplus 96 97 #else // GPU 98 //LIB_API void cuda_set_device(int n); 99 #endif // GPU 100 #endif // DARKCUDA_H 101