1 #section support_code
2 
3 static int
c_set_tensor_for_conv(PyGpuArrayObject * var,cudnnTensorDescriptor_t desc,size_t groups)4 c_set_tensor_for_conv(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc, size_t groups) {
5   cudnnDataType_t dt;
6   size_t ds;
7   switch (var->ga.typecode) {
8   case GA_FLOAT:
9     dt = CUDNN_DATA_FLOAT;
10     break;
11   case GA_DOUBLE:
12     dt = CUDNN_DATA_DOUBLE;
13     break;
14   case GA_HALF:
15     dt = CUDNN_DATA_HALF;
16     break;
17   default:
18     PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensorNd");
19     return -1;
20   }
21   ds = gpuarray_get_elsize(var->ga.typecode);
22 
23   int strs[8], dims[8], default_stride = 1;
24   unsigned int nd = PyGpuArray_NDIM(var);
25 
26   if (nd > 8) {
27     PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
28     return -1;
29   }
30 
31   for (unsigned int _i = nd; _i > 0; _i--) {
32     unsigned int i = _i - 1;
33     strs[i] = (PyGpuArray_DIM(var, i) != 1 && PyGpuArray_STRIDE(var, i)) ?
34       PyGpuArray_STRIDE(var, i)/ds : default_stride;
35     default_stride *= PyGpuArray_DIM(var, i);
36     dims[i] = PyGpuArray_DIM(var, i);
37   }
38 
39   /* Tensors can't be smaller than 3d for cudnn so we pad the
40    * descriptor if they are */
41   for (unsigned int i = nd; i < 3; i++) {
42     strs[i] = 1;
43     dims[i] = 1;
44   }
45   //only for grouped convolution i.e when groups > 1
46   dims[1] = dims[1] / groups;
47   cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd < 3 ? 3 : nd,
48                                                  dims, strs);
49   if (err != CUDNN_STATUS_SUCCESS) {
50     PyErr_Format(PyExc_RuntimeError,
51 		 "Could not set tensorNd descriptor: %s",
52 		 cudnnGetErrorString(err));
53     return -1;
54   }
55   return 0;
56 }
57 
58 static int
c_set_tensorNd(PyGpuArrayObject * var,cudnnTensorDescriptor_t desc)59 c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
60  return c_set_tensor_for_conv(var, desc, 1);
61 }
62 
c_make_tensorNd(PyGpuArrayObject * var,cudnnTensorDescriptor_t * desc)63 static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc) {
64   cudnnStatus_t err;
65   err = cudnnCreateTensorDescriptor(desc);
66   if (err != CUDNN_STATUS_SUCCESS) {
67     PyErr_Format(PyExc_RuntimeError,
68                  "Could not create tensor descriptor: %s",
69                  cudnnGetErrorString(err));
70     return -1;
71   }
72   if (c_set_tensorNd(var, *desc) != 0) {
73     cudnnDestroyTensorDescriptor(*desc);
74     return -1;
75   }
76   return 0;
77 }
78 
79 static int
c_set_filter(PyGpuArrayObject * var,cudnnFilterDescriptor_t desc,size_t groups)80 c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc, size_t groups) {
81   cudnnDataType_t dt;
82   cudnnStatus_t err;
83 
84   if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
85     PyErr_SetString(PyExc_ValueError,
86 		    "Only contiguous filters (kernels) are supported.");
87     return -1;
88   }
89   switch (var->ga.typecode) {
90   case GA_FLOAT:
91     dt = CUDNN_DATA_FLOAT;
92     break;
93   case GA_DOUBLE:
94     dt = CUDNN_DATA_DOUBLE;
95     break;
96   case GA_HALF:
97     dt = CUDNN_DATA_HALF;
98     break;
99   default:
100     PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter");
101     return -1;
102   }
103 
104   int dims[8];
105   unsigned int nd = PyGpuArray_NDIM(var);
106 
107   if (nd > 8) {
108     PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
109     return -1;
110   }
111 
112   for (unsigned int _i = nd; _i > 0; _i--) {
113     unsigned int i = _i - 1;
114     dims[i] = PyGpuArray_DIM(var, i);
115   }
116 
117   /* Filters can't be less than 3d so we pad */
118   for (unsigned int i = nd; i < 3; i++)
119     dims[i] = 1;
120   dims[0] = dims[0] / groups;
121 
122   if (nd < 3)
123     nd = 3;
124 
125     err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
126 
127   if (err != CUDNN_STATUS_SUCCESS) {
128     PyErr_Format(PyExc_RuntimeError,
129 		 "Could not set filter descriptor: %s.",
130 		 cudnnGetErrorString(err));
131     return -1;
132   }
133   return 0;
134 }
135 
c_make_filter(PyGpuArrayObject * var,cudnnFilterDescriptor_t * desc)136 static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
137   cudnnStatus_t err;
138   err = cudnnCreateFilterDescriptor(desc);
139   if (err != CUDNN_STATUS_SUCCESS) {
140     PyErr_Format(PyExc_RuntimeError,
141                  "Could not create tensor descriptor: %s",
142                  cudnnGetErrorString(err));
143     return -1;
144   }
145   if (c_set_filter(var, *desc, 1) != 0) {
146     cudnnDestroyFilterDescriptor(*desc);
147     return -1;
148   }
149   return 0;
150 }
151 
152 #section init_code
153 
154 setup_ext_cuda();
155