1 #section init_code_struct
2 
3 {
4   cudnnStatus_t err;
5 
6   bn_doutput = NULL;
7   if ((err = cudnnCreateTensorDescriptor(&bn_doutput)) != CUDNN_STATUS_SUCCESS) {
8     PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
9                  "(bn_doutput): %s", cudnnGetErrorString(err));
10     FAIL;
11   }
12 }
13 
14 #section cleanup_code_struct
15 
16 if (bn_doutput != NULL)
17   cudnnDestroyTensorDescriptor(bn_doutput);
18 
19 #section support_code_struct
20 
21 cudnnTensorDescriptor_t bn_doutput;
22 
dnn_batchnorm_grad(PyGpuArrayObject * inp,PyGpuArrayObject * doutp,PyGpuArrayObject * scale,PyGpuArrayObject * x_mean,PyGpuArrayObject * x_invstd,npy_float64 epsilon,PyGpuArrayObject ** dinp,PyGpuArrayObject ** dscale,PyGpuArrayObject ** dbias,PARAMS_TYPE * params)23 int dnn_batchnorm_grad(PyGpuArrayObject *inp, PyGpuArrayObject *doutp,
24                        PyGpuArrayObject *scale, PyGpuArrayObject *x_mean,
25                        PyGpuArrayObject *x_invstd, npy_float64 epsilon,
26                        PyGpuArrayObject **dinp, PyGpuArrayObject **dscale,
27                        PyGpuArrayObject **dbias, PARAMS_TYPE* params) {
28   PyGpuContextObject *c = inp->context;
29 
30   if (c_set_tensorNd(inp, bn_input) != 0)
31     return 1;
32   if (c_set_tensorNd(doutp, bn_doutput) != 0)
33     return 1;
34   if (c_set_tensorNd(scale, bn_params) != 0)
35     return 1;
36 
37   if (epsilon < 1e-5) {
38     PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
39     return 1;
40   }
41 
42   if (theano_prep_output(dinp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0)
43     return 1;
44   if (theano_prep_output(dscale, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
45     return 1;
46   if (theano_prep_output(dbias, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
47     return 1;
48 
49   if (c_set_tensorNd(*dinp, bn_output) != 0)
50     return 1;
51 
52   {
53     const float falpha = 1.;
54     const float fbeta = 0.;
55     const double dalpha = 1.;
56     const double dbeta = 0.;
57     void *alphaData;
58     void *betaData;
59     void *alphaParam;
60     void *betaParam;
61     if (inp->ga.typecode == GA_DOUBLE) {
62       alphaData = (void *)&dalpha;
63       betaData = (void *)&dbeta;
64       alphaParam = (void *)&dalpha;
65       betaParam = (void *)&dbeta;
66     } else {
67       alphaData = (void *)&falpha;
68       betaData = (void *)&fbeta;
69       alphaParam = (void *)&falpha;
70       betaParam = (void *)&fbeta;
71     }
72     cudnnStatus_t err = cudnnBatchNormalizationBackward(
73       params->handle,
74       params->mode,
75       alphaData,
76       betaData,
77       alphaParam,
78       betaParam,
79       bn_input,
80       PyGpuArray_DEV_DATA(inp),
81       bn_doutput,
82       PyGpuArray_DEV_DATA(doutp),
83       bn_output,
84       PyGpuArray_DEV_DATA(*dinp),
85       bn_params,
86       PyGpuArray_DEV_DATA(scale),
87       PyGpuArray_DEV_DATA(*dscale),
88       PyGpuArray_DEV_DATA(*dbias),
89       epsilon,
90       PyGpuArray_DEV_DATA(x_mean),
91       PyGpuArray_DEV_DATA(x_invstd)
92       );
93     if (err != CUDNN_STATUS_SUCCESS) {
94       PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n",
95                    cudnnGetErrorString(err));
96       return 1;
97     }
98   }
99   return 0;
100 }
101