1 #ifndef DARKCUDA_H
2 #define DARKCUDA_H
3 #include "darknet.h"
4 
5 #ifdef __cplusplus
6 extern "C" {
7 #endif
8 
9 
10 extern int cuda_debug_sync;
11 extern int gpu_index;
12 #ifdef __cplusplus
13 }
14 #endif // __cplusplus
15 
16 #ifdef GPU
17 
18 #define BLOCK 512
19 #define FULL_MASK 0xffffffff
20 #define WARP_SIZE 32
21 #define BLOCK_TRANSPOSE32 256
22 
23 #include <cuda.h>
24 #include <cuda_runtime.h>
25 #include <curand.h>
26 #include <cublas_v2.h>
27 #include <cuda_runtime_api.h>
28 //#include <driver_types.h>
29 
30 #ifdef CUDNN
31 #include <cudnn.h>
32 #endif // CUDNN
33 
34 #ifndef __DATE__
35 #define __DATE__
36 #endif
37 
38 #ifndef __TIME__
39 #define __TIME__
40 #endif
41 
42 #ifndef __FUNCTION__
43 #define __FUNCTION__
44 #endif
45 
46 #ifndef __LINE__
47 #define __LINE__ 0
48 #endif
49 
50 #ifndef __FILE__
51 #define __FILE__
52 #endif
53 
54 #ifdef __cplusplus
55 extern "C" {
56 #endif // __cplusplus
57     void check_error(cudaError_t status);
58     void check_error_extended(cudaError_t status, const char *file, int line, const char *date_time);
59 #define CHECK_CUDA(X) check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
60 
61     cublasHandle_t blas_handle();
62     void free_pinned_memory();
63     void pre_allocate_pinned_memory(size_t size);
64     float *cuda_make_array_pinned_preallocated(float *x, size_t n);
65     float *cuda_make_array_pinned(float *x, size_t n);
66     float *cuda_make_array(float *x, size_t n);
67     void **cuda_make_array_pointers(void **x, size_t n);
68     int *cuda_make_int_array(size_t n);
69 	int *cuda_make_int_array_new_api(int *x, size_t n);
70     void cuda_push_array(float *x_gpu, float *x, size_t n);
71     //LIB_API void cuda_pull_array(float *x_gpu, float *x, size_t n);
72     //LIB_API void cuda_set_device(int n);
73     int cuda_get_device();
74     void cuda_free_host(float *x_cpu);
75     void cuda_free(float *x_gpu);
76     void cuda_random(float *x_gpu, size_t n);
77     float cuda_compare(float *x_gpu, float *x, size_t n, char *s);
78     dim3 cuda_gridsize(size_t n);
79     cudaStream_t get_cuda_stream();
80     cudaStream_t get_cuda_memcpy_stream();
81     int get_number_of_blocks(int array_size, int block_size);
82     int get_gpu_compute_capability(int i, char *device_name);
83     void show_cuda_cudnn_info();
84 
85 #ifdef CUDNN
86 cudnnHandle_t cudnn_handle();
87 enum {cudnn_fastest, cudnn_smallest, cudnn_specify};
88 
89 void cudnn_check_error_extended(cudnnStatus_t status, const char *file, int line, const char *date_time);
90 #define CHECK_CUDNN(X) cudnn_check_error_extended(X, __FILE__ " : " __FUNCTION__, __LINE__,  __DATE__ " - " __TIME__ );
91 #endif
92 
93 #ifdef __cplusplus
94 }
95 #endif // __cplusplus
96 
97 #else // GPU
98 //LIB_API void cuda_set_device(int n);
99 #endif // GPU
100 #endif // DARKCUDA_H
101