1 #section support_code_struct
2
dnn_batchnorm_op(PyGpuArrayObject * inp,PyGpuArrayObject * scale,PyGpuArrayObject * bias,npy_float64 epsilon,npy_float64 running_average_factor,PyGpuArrayObject * in_running_mean,PyGpuArrayObject * in_running_var,PyGpuArrayObject ** outp,PyGpuArrayObject ** x_mean,PyGpuArrayObject ** x_invstd,PyGpuArrayObject ** out_running_mean,PyGpuArrayObject ** out_running_var,PARAMS_TYPE * params)3 int dnn_batchnorm_op(PyGpuArrayObject *inp, PyGpuArrayObject *scale,
4 PyGpuArrayObject *bias, npy_float64 epsilon,
5 npy_float64 running_average_factor,
6 PyGpuArrayObject *in_running_mean, // may be NULL
7 PyGpuArrayObject *in_running_var, // may be NULL
8 PyGpuArrayObject **outp,
9 PyGpuArrayObject **x_mean,
10 PyGpuArrayObject **x_invstd,
11 PyGpuArrayObject **out_running_mean, // may be NULL
12 PyGpuArrayObject **out_running_var, // may be NULL
13 PARAMS_TYPE* params) {
14 /* Note: based on Python code, in_running_mean, in_running_var, out_running_mean and out_running_var
15 are together NULL (or not NULL) at same time, so we just need to check only one of them. */
16 bool running_averages = (in_running_mean != NULL);
17 PyGpuContextObject *c = inp->context;
18
19 if (c_set_tensorNd(inp, bn_input) != 0)
20 return 1;
21 if (c_set_tensorNd(scale, bn_params) != 0)
22 return 1;
23
24 if (epsilon < 1e-5) {
25 PyErr_Format(PyExc_ValueError, "epsilon must be at least 1e-5, got %f", epsilon);
26 return 1;
27 }
28
29 if (params->inplace_output) {
30 Py_XDECREF(*outp);
31 *outp = inp;
32 Py_INCREF(*outp);
33 } else if (theano_prep_output(outp, inp->ga.nd, inp->ga.dimensions, inp->ga.typecode, GA_C_ORDER, c) != 0) {
34 return 1;
35 }
36
37 if (theano_prep_output(x_mean, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
38 return 1;
39 if (theano_prep_output(x_invstd, scale->ga.nd, scale->ga.dimensions, scale->ga.typecode, GA_C_ORDER, c) != 0)
40 return 1;
41
42 if (c_set_tensorNd(*outp, bn_output) != 0)
43 return 1;
44
45 PyGpuArrayObject *running_mean = NULL;
46 PyGpuArrayObject *running_var = NULL;
47 if (running_averages) {
48 if (params->inplace_running_mean) {
49 Py_XDECREF(*out_running_mean);
50 running_mean = in_running_mean;
51 Py_INCREF(running_mean);
52 } else {
53 running_mean = *out_running_mean;
54 running_mean = theano_try_copy(running_mean, in_running_mean);
55 if (running_mean == NULL) {
56 return 1;
57 }
58 }
59 if (params->inplace_running_var) {
60 Py_XDECREF(*out_running_var);
61 running_var = in_running_var;
62 Py_INCREF(running_var);
63 } else {
64 running_var = *out_running_var;
65 running_var = theano_try_copy(running_var, in_running_var);
66 if (running_var == NULL) {
67 return 1;
68 }
69 }
70 }
71
72 {
73 const float falpha = 1.;
74 const float fbeta = 0.;
75 const double dalpha = 1.;
76 const double dbeta = 0.;
77 void *alpha;
78 void *beta;
79 if (inp->ga.typecode == GA_DOUBLE) {
80 alpha = (void *)&dalpha;
81 beta = (void *)&dbeta;
82 } else {
83 alpha = (void *)&falpha;
84 beta = (void *)&fbeta;
85 }
86 cudnnStatus_t err = cudnnBatchNormalizationForwardTraining(
87 params->handle,
88 params->mode,
89 alpha,
90 beta,
91 bn_input,
92 PyGpuArray_DEV_DATA(inp),
93 bn_output,
94 PyGpuArray_DEV_DATA(*outp),
95 bn_params,
96 PyGpuArray_DEV_DATA(scale),
97 PyGpuArray_DEV_DATA(bias),
98 running_averages ? running_average_factor : 0,
99 running_averages ? PyGpuArray_DEV_DATA(running_mean) : NULL,
100 running_averages ? PyGpuArray_DEV_DATA(running_var): NULL,
101 epsilon,
102 PyGpuArray_DEV_DATA(*x_mean),
103 PyGpuArray_DEV_DATA(*x_invstd)
104 );
105 if (err != CUDNN_STATUS_SUCCESS) {
106 PyErr_Format(PyExc_RuntimeError, "Error during batchnorm: %s\n",
107 cudnnGetErrorString(err));
108 return 1;
109 }
110 if (running_averages) {
111 *out_running_mean = running_mean;
112 *out_running_var = running_var;
113 }
114 }
115 return 0;
116 }
117