1 #section support_code_struct
2
3 cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
4 cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
5 cudnnPoolingDescriptor_t APPLY_SPECIFIC(pool);
6
7
8 #section init_code_struct
9
10 cudnnStatus_t APPLY_SPECIFIC(err);
11 APPLY_SPECIFIC(input) = NULL;
12 APPLY_SPECIFIC(output) = NULL;
13 APPLY_SPECIFIC(pool) = NULL;
14
15 if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) {
16 PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
17 "(inp): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
18 FAIL;
19 }
20 if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) != CUDNN_STATUS_SUCCESS) {
21 PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
22 "(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
23 FAIL;
24 }
25 if ((APPLY_SPECIFIC(err) = cudnnCreatePoolingDescriptor(&APPLY_SPECIFIC(pool))) != CUDNN_STATUS_SUCCESS) {
26 PyErr_Format(PyExc_MemoryError, "could not allocate pooling descriptor"
27 "(pool): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
28 FAIL;
29 }
30
31 #section cleanup_code_struct
32
33 if (APPLY_SPECIFIC(input) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input)); }
34 if (APPLY_SPECIFIC(output) != NULL) { cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); }
35 if (APPLY_SPECIFIC(pool) != NULL) { cudnnDestroyPoolingDescriptor(APPLY_SPECIFIC(pool)); }
36
37
38 #section support_code_struct
39
APPLY_SPECIFIC(dnn_pool)40 int APPLY_SPECIFIC(dnn_pool)(PyGpuArrayObject *img,
41 PyArrayObject *ws,
42 PyArrayObject *stride,
43 PyArrayObject *pad,
44 PyGpuArrayObject **out,
45 PARAMS_TYPE* params) {
46 PyGpuContextObject *c = img->context;
47 size_t dims[5];
48 cudnnStatus_t err;
49
50 if (!GpuArray_IS_C_CONTIGUOUS(&img->ga)) {
51 PyErr_SetString(PyExc_ValueError, "Only contiguous inputs are supported.");
52 return 1;
53 }
54
55 cudnnPoolingMode_t mode;
56 int w[3];
57 int p[3];
58 int s[3];
59 int ndims = PyArray_DIM(ws, 0);//PyGpuArray_NDIM(img) - 2;
60
61 for(int i = 0; i < ndims; i++) {
62 w[i] = *((npy_intp*)PyArray_GETPTR1(ws, i));
63 }
64 for(int i = 0; i < ndims; i++) {
65 p[i] = *((npy_intp*)PyArray_GETPTR1(pad, i));
66 }
67 for(int i = 0; i < ndims; i++) {
68 s[i] = *((npy_intp*)PyArray_GETPTR1(stride, i));
69 }
70
71 dims[0] = PyGpuArray_DIM(img, 0);
72 dims[1] = PyGpuArray_DIM(img, 1);
73 dims[2] = (PyGpuArray_DIM(img, 2) + (p[0]*2) - w[0]) / s[0] + 1;
74 dims[3] = (PyGpuArray_DIM(img, 3) + (p[1]*2) - w[1]) / s[1] + 1;
75 if (ndims == 3)
76 dims[4] = (PyGpuArray_DIM(img, 4) + (p[2]*2) - w[2]) / s[2] + 1;
77
78 if (theano_prep_output(out, ndims+2, dims, img->ga.typecode,
79 GA_C_ORDER, c) != 0)
80 return 1;
81
82 // if input batch is empty, we return the empty output without calling cuDNN
83 // (which will fail on zero batch size).
84 if (PyGpuArray_DIM(*out, 0) == 0)
85 return 0;
86
87 if (c_set_tensorNd(img, APPLY_SPECIFIC(input)) != 0)
88 return 1;
89
90 if (c_set_tensorNd(*out, APPLY_SPECIFIC(output)) != 0)
91 return 1;
92
93 err = cudnnSetPoolingNdDescriptor(APPLY_SPECIFIC(pool), params->mode, CUDNN_PROPAGATE_NAN, ndims, w, p, s);
94
95 if (err != CUDNN_STATUS_SUCCESS) {
96 PyErr_Format(PyExc_RuntimeError, "could not set op descriptor %s", cudnnGetErrorString(err));
97 return 1;
98 }
99
100 {
101 const float alphaf = 1;
102 const float betaf = 0;
103 const double alphad = 1;
104 const double betad = 0;
105 void *alpha, *beta;
106
107 switch (img->ga.typecode) {
108 case GA_DOUBLE:
109 alpha = (void *)&alphad;
110 beta = (void *)&betad;
111 break;
112 case GA_FLOAT:
113 case GA_HALF:
114 alpha = (void *)&alphaf;
115 beta = (void *)&betaf;
116 break;
117 default:
118 PyErr_SetString(PyExc_TypeError, "Unsupported type in pooling");
119 return 1;
120 }
121
122 cuda_enter(c->ctx);
123
124 cuda_wait(img->ga.data, GPUARRAY_CUDA_WAIT_READ);
125 cuda_wait((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
126
127 err = cudnnPoolingForward(
128 params->handle, APPLY_SPECIFIC(pool),
129 alpha,
130 APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(img),
131 beta,
132 APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*out));
133
134 cuda_record(img->ga.data, GPUARRAY_CUDA_WAIT_READ);
135 cuda_record((*out)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
136
137 cuda_exit(c->ctx);
138 }
139 if (err != CUDNN_STATUS_SUCCESS) {
140 PyErr_Format(PyExc_RuntimeError,
141 "GpuDnnPool: error doing cudnnPoolingForward operation: %s",
142 cudnnGetErrorString(err));
143 return 1;
144 }
145 return 0;
146 }
147