1 #section support_code_struct
2 
dnn_batchnorm_op(PyGpuArrayObject * inp,PyGpuArrayObject * scale,PyGpuArrayObject * bias,npy_float64 epsilon,npy_float64 running_average_factor,PyGpuArrayObject * in_running_mean,PyGpuArrayObject * in_running_var,PyGpuArrayObject ** outp,PyGpuArrayObject ** x_mean,PyGpuArrayObject ** x_invstd,PyGpuArrayObject ** out_running_mean,PyGpuArrayObject ** out_running_var,PARAMS_TYPE * params)3 int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
4                      PyGpuArrayObject *bias, npy_float64 epsilon,
5                      npy_float64 running_average_factor,
6                      PyGpuArrayObject *in_running_mean, // may be NULL
7                      PyGpuArrayObject *in_running_var, // may be NULL
8                      PyGpuArrayObject **outp,
9                      PyGpuArrayObject **x_mean,
10                      PyGpuArrayObject **x_invstd,
11                      PyGpuArrayObject **out_running_mean, // may be NULL
12                      PyGpuArrayObject **out_running_var, // may be NULL
13                      PARAMS_TYPE* params) {
14   /* Note: based on Python code, in_running_mean, in_running_var, out_running_mean and out_running_var
15   are together NULL (or not NULL) at same time, so we just need to check only one of them. */
16   bool running_averages = (in_running_mean != NULL);
17   PyGpuContextObject *c = inp->context;
18 
19   if (c_set_tensorNd(inp, bn_input) != 0)
20     return 1;
21   if (c_set_tensorNd(scale, bn_params) != 0)
22     return 1;
23 
24   if (epsilon < 1e-5) {
25     PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
26     return 1;
27   }
28 
29   if (params->inplace_output) {
30     Py_XDECREF(*outp);
31     *outp = inp;
32     Py_INCREF(*outp);
33   } else if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) {
34     return 1;
35   }
36 
37   if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
38     return 1;
39   if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
40     return 1;
41 
42   if (c_set_tensorNd(*outp, bn_output) != 0)
43     return 1;
44 
45   PyGpuArrayObject *running_mean = NULL;
46   PyGpuArrayObject *running_var = NULL;
47   if (running_averages) {
48     if (params->inplace_running_mean) {
49       Py_XDECREF(*out_running_mean);
50       running_mean = in_running_mean;
51       Py_INCREF(running_mean);
52     } else {
53       running_mean = *out_running_mean;
54       running_mean = theano_try_copy(running_mean, in_running_mean);
55       if (running_mean == NULL) {
56         return 1;
57       }
58     }
59     if (params->inplace_running_var) {
60       Py_XDECREF(*out_running_var);
61       running_var = in_running_var;
62       Py_INCREF(running_var);
63     } else {
64       running_var = *out_running_var;
65       running_var = theano_try_copy(running_var, in_running_var);
66       if (running_var == NULL) {
67         return 1;
68       }
69     }
70   }
71 
72   {
73     const float falpha = 1.;
74     const float fbeta = 0.;
75     const double dalpha = 1.;
76     const double dbeta = 0.;
77     void *alpha;
78     void *beta;
79     if (inp->ga.typecode == GA_DOUBLE) {
80       alpha = (void *)&dalpha;
81       beta = (void *)&dbeta;
82     } else {
83       alpha = (void *)&falpha;
84       beta = (void *)&fbeta;
85     }
86     cudnnStatus_t err = cudnnBatchNormalizationForwardTraining(
87       params->handle,
88       params->mode,
89       alpha,
90       beta,
91       bn_input,
92       PyGpuArray_DEV_DATA(inp),
93       bn_output,
94       PyGpuArray_DEV_DATA(*outp),
95       bn_params,
96       PyGpuArray_DEV_DATA(scale),
97       PyGpuArray_DEV_DATA(bias),
98       running_averages ? running_average_factor : 0,
99       running_averages ? PyGpuArray_DEV_DATA(running_mean) : NULL,
100       running_averages ? PyGpuArray_DEV_DATA(running_var): NULL,
101       epsilon,
102       PyGpuArray_DEV_DATA(*x_mean),
103       PyGpuArray_DEV_DATA(*x_invstd)
104       );
105     if (err != CUDNN_STATUS_SUCCESS) {
106       PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n",
107                    cudnnGetErrorString(err));
108       return 1;
109     }
110     if (running_averages) {
111       *out_running_mean = running_mean;
112       *out_running_var = running_var;
113     }
114   }
115   return 0;
116 }
117