1 #section support_code
2
3 static int
c_set_tensor_for_conv(PyGpuArrayObject * var,cudnnTensorDescriptor_t desc,size_t groups)4 c_set_tensor_for_conv(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc, size_t groups) {
5 cudnnDataType_t dt;
6 size_t ds;
7 switch (var->ga.typecode) {
8 case GA_FLOAT:
9 dt = CUDNN_DATA_FLOAT;
10 break;
11 case GA_DOUBLE:
12 dt = CUDNN_DATA_DOUBLE;
13 break;
14 case GA_HALF:
15 dt = CUDNN_DATA_HALF;
16 break;
17 default:
18 PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensorNd");
19 return -1;
20 }
21 ds = gpuarray_get_elsize(var->ga.typecode);
22
23 int strs[8], dims[8], default_stride = 1;
24 unsigned int nd = PyGpuArray_NDIM(var);
25
26 if (nd > 8) {
27 PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
28 return -1;
29 }
30
31 for (unsigned int _i = nd; _i > 0; _i--) {
32 unsigned int i = _i - 1;
33 strs[i] = (PyGpuArray_DIM(var, i) != 1 && PyGpuArray_STRIDE(var, i)) ?
34 PyGpuArray_STRIDE(var, i)/ds : default_stride;
35 default_stride *= PyGpuArray_DIM(var, i);
36 dims[i] = PyGpuArray_DIM(var, i);
37 }
38
39 /* Tensors can't be smaller than 3d for cudnn so we pad the
40 * descriptor if they are */
41 for (unsigned int i = nd; i < 3; i++) {
42 strs[i] = 1;
43 dims[i] = 1;
44 }
45 //only for grouped convolution i.e when groups > 1
46 dims[1] = dims[1] / groups;
47 cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd < 3 ? 3 : nd,
48 dims, strs);
49 if (err != CUDNN_STATUS_SUCCESS) {
50 PyErr_Format(PyExc_RuntimeError,
51 "Could not set tensorNd descriptor: %s",
52 cudnnGetErrorString(err));
53 return -1;
54 }
55 return 0;
56 }
57
58 static int
c_set_tensorNd(PyGpuArrayObject * var,cudnnTensorDescriptor_t desc)59 c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
60 return c_set_tensor_for_conv(var, desc, 1);
61 }
62
c_make_tensorNd(PyGpuArrayObject * var,cudnnTensorDescriptor_t * desc)63 static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc) {
64 cudnnStatus_t err;
65 err = cudnnCreateTensorDescriptor(desc);
66 if (err != CUDNN_STATUS_SUCCESS) {
67 PyErr_Format(PyExc_RuntimeError,
68 "Could not create tensor descriptor: %s",
69 cudnnGetErrorString(err));
70 return -1;
71 }
72 if (c_set_tensorNd(var, *desc) != 0) {
73 cudnnDestroyTensorDescriptor(*desc);
74 return -1;
75 }
76 return 0;
77 }
78
79 static int
c_set_filter(PyGpuArrayObject * var,cudnnFilterDescriptor_t desc,size_t groups)80 c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc, size_t groups) {
81 cudnnDataType_t dt;
82 cudnnStatus_t err;
83
84 if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
85 PyErr_SetString(PyExc_ValueError,
86 "Only contiguous filters (kernels) are supported.");
87 return -1;
88 }
89 switch (var->ga.typecode) {
90 case GA_FLOAT:
91 dt = CUDNN_DATA_FLOAT;
92 break;
93 case GA_DOUBLE:
94 dt = CUDNN_DATA_DOUBLE;
95 break;
96 case GA_HALF:
97 dt = CUDNN_DATA_HALF;
98 break;
99 default:
100 PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter");
101 return -1;
102 }
103
104 int dims[8];
105 unsigned int nd = PyGpuArray_NDIM(var);
106
107 if (nd > 8) {
108 PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
109 return -1;
110 }
111
112 for (unsigned int _i = nd; _i > 0; _i--) {
113 unsigned int i = _i - 1;
114 dims[i] = PyGpuArray_DIM(var, i);
115 }
116
117 /* Filters can't be less than 3d so we pad */
118 for (unsigned int i = nd; i < 3; i++)
119 dims[i] = 1;
120 dims[0] = dims[0] / groups;
121
122 if (nd < 3)
123 nd = 3;
124
125 err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
126
127 if (err != CUDNN_STATUS_SUCCESS) {
128 PyErr_Format(PyExc_RuntimeError,
129 "Could not set filter descriptor: %s.",
130 cudnnGetErrorString(err));
131 return -1;
132 }
133 return 0;
134 }
135
c_make_filter(PyGpuArrayObject * var,cudnnFilterDescriptor_t * desc)136 static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
137 cudnnStatus_t err;
138 err = cudnnCreateFilterDescriptor(desc);
139 if (err != CUDNN_STATUS_SUCCESS) {
140 PyErr_Format(PyExc_RuntimeError,
141 "Could not create tensor descriptor: %s",
142 cudnnGetErrorString(err));
143 return -1;
144 }
145 if (c_set_filter(var, *desc, 1) != 0) {
146 cudnnDestroyFilterDescriptor(*desc);
147 return -1;
148 }
149 return 0;
150 }
151
152 #section init_code
153
154 setup_ext_cuda();
155