1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved.                      *
3 * This file is part of the LIBXSMM library.                                   *
4 *                                                                             *
5 * For information on the license, see the LICENSE file.                       *
6 * Further information: https://github.com/hfp/libxsmm/                        *
7 * SPDX-License-Identifier: BSD-3-Clause                                       *
8 ******************************************************************************/
9 /* Sasikanth Avancha, Dhiraj Kalamkar (Intel Corp.)
10 ******************************************************************************/
11 
12 
13 #include <string>
14 #include "Dropout.hpp"
15 #include "fillers.hpp"
16 
17 #define PRIME_SEED 131
18 
19 using namespace std;
20 using namespace gxm;
21 
DropoutNode(DropoutParams * p,MLEngine * e)22 DropoutNode::DropoutNode(DropoutParams* p, MLEngine* e): NNNode(p, e)
23 {
24   nname_ = p->get_node_name();
25   ntype_ = p->get_node_type();
26   mode_ = p->get_mode();
27   bottom_ = p->get_bottom_names();
28   top_ = p->get_top_names();
29   bp_flag_ = p->get_bprop_flag();
30   has_weights_ = false;
31 
32   assert((bottom_.size() == 1) && (top_.size() == 1));
33 
34   tenTop_ = new Tensor(top_[0]);
35   assert(tenTop_ != NULL);
36   tenTop_->setOwner(this);
37   tenTop_->setType(ACT);
38   tenTopData_ = tenTop_->getBuf(DATA);
39   tenTopData_->setBufferType(DATA);
40 
41 #ifdef DEBUG
42   printf("bottom name %s\n",bottom_[0].c_str());
43 #endif
44 
45   tenBot_ = e->get_tensor(bottom_[0], ACT);
46   assert(tenBot_ != NULL);
47   setPrevNode((NNNode*)tenBot_->getOwner());
48   tenBotData_ = tenBot_->getBuf(DATA);
49 
50   //Output tensor data type = input tensor data type
51   int dtype = p->get_data_type();
52   tenTopData_->setDataType(dtype);
53 
54   // Get input tensor shape (bottom)
55   Shape* bs = tenBot_->getShape();
56   assert(bs->ndims <= MAX_DIMS);
57 
58   Shape ts;
59   shape_setzero(&ts);
60 
61   ts.ndims = bs->ndims;
62   for(int i=0; i < bs->ndims; i++)
63     ts.dims[i] = bs->dims[i];
64 
65   tenTop_->setShape(&ts);
66 
67   long long int tsize = 1;
68   for(int i=0; i<ts.ndims; i++)
69     tsize = tsize*ts.dims[i];
70 
71   // Mask to select neuron activations to be dropped out
72   tenMask_ = new int[tsize];
73 
74   if(dtype == DT_FLOAT)
75     tsize = tsize*sizeof(float);
76   else if(dtype == DT_INT16)
77     tsize = tsize*sizeof(short int);
78 
79   // Set the logical size of the tensor buffer for bufId=0 (forward data buffer).
80   // Note: we have no knowledge of the machine parameters here, so effectively this is single-machine config
81   tenTopData_->setBufferSize(tsize);
82 
83   if(!e->is_inference_only())
84   {
85     if(bp_flag_)
86     {
87       tenBotDiff_ = tenBot_->addBuf(); // DIFF type and index
88       tenBotDiff_->setDataType(dtype);
89       tenBotDiff_->setBufferType(DIFF);
90 
91       long long int bsize = 1;
92       for(int i=0; i<bs->ndims; i++)
93         bsize = bsize*bs->dims[i];
94       if(dtype == DT_FLOAT)
95         bsize = bsize*sizeof(float);
96       else if(dtype == DT_INT)
97         bsize = bsize*sizeof(int);
98 
99       // Set the size of the input-gradient buffer
100       tenBotDiff_->setBufferSize(bsize);
101     }
102   }
103   else
104     tenBotDiff_ = NULL;
105 
106   // Compute scale via dropout_ratio
107   threshold_ = p->get_dropout_ratio();
108   if(threshold_ != 0.5)
109   {
110     printf("Support for threshold %f not implemented! Resetting to 0.5\n",threshold_);
111     threshold_ = 0.5;
112   }
113   scale_ = 1./(1 - threshold_);
114 
115   // Register output tensor in tensor map
116   bool inserted = e->register_tensor(top_[0], ACT, tenTop_);
117   if(!inserted)
118     printf("Warning: Tensor %s already registered\n",NNNode::top_[0].c_str());
119 
120   gparams_.batch_size = bs->dims[0];
121   gparams_.nInput = bs->dims[1];
122   gparams_.nOutput = gparams_.nInput;
123   gparams_.iHeight = bs->dims[2];
124   gparams_.iWidth = bs->dims[3];
125   gparams_.oHeight = ts.dims[2];
126   gparams_.oWidth = ts.dims[3];
127   gparams_.data_type = dtype;
128 
129   gparams_.num_threads = e->get_num_threads();
130 
131   seeds = new unsigned int[gparams_.num_threads];
132   for(int i=0; i<gparams_.num_threads; i++)
133     seeds[i] = PRIME_SEED + i;
134 
135   eptr_ = e;
136 };
137 
forwardPropagate()138 void DropoutNode::forwardPropagate()
139 {
140 #ifdef RETURNALL
141   return;
142 #endif
143 
144   float* bot = (float*)(tenBotData_->getBuffer());
145   float* top = (float*)(tenTopData_->getBuffer());
146   int *mask = (int *)tenMask_;
147  // unsigned int *seeds = tenSeeds_;
148 
149 #ifdef DEBUG
150   printf("Executing FP %s: input %p, output %p\n",NNNode::nname_.c_str(), bot, top);
151   printf("Inputs: %d\n",gparams_.nInput);
152   printf("Outputs: %d\n",gparams_.nOutput);
153 #endif
154 
155   int M = gparams_.batch_size;
156   int N = gparams_.nOutput;
157   int H = gparams_.oHeight;
158   int W = gparams_.oWidth;
159 
160   if(eptr_->get_execution_mode() == TRAIN)
161   {
162 
163 #ifdef _OPENMP
164 #pragma omp parallel for
165 #endif
166     for (int i = 0; i < M*N*H*W; i++)
167     {
168       int r = rand_r(&seeds[omp_get_thread_num()]);
169       if(r%2 == 0)
170         top[i] = 0;
171       else
172         top[i] = bot[i] * scale_;
173     }
174   }
175   else
176   {
177 #ifdef _OPENMP
178 #pragma omp parallel for
179 #endif
180     for (int i = 0; i < M*N*H*W; i++)
181       top[i] = bot[i];
182   }
183 
184 #ifdef DEBUG
185   MeanOfLayer((char*)bottom_[0].c_str(), bot, M*N*H*W);
186   MeanOfLayer((char*)top_[0].c_str(), top, M*N*H*W);
187 #endif
188 }
189 
backPropagate()190 void DropoutNode::backPropagate()
191 {
192 #ifdef REUTRNALL
193   return;
194 #endif
195 
196   int M = gparams_.batch_size;
197   int N = gparams_.nOutput;
198   int H = gparams_.oHeight;
199   int W = gparams_.oWidth;
200 
201   TensorBuf *tenTopDiff = tenTop_->getBuf(DIFF);
202   float *gtop = (float*)(tenTopDiff->getBuffer());
203   assert(gtop != NULL);
204 
205   float* gbot = (float*)(tenBotDiff_->getBuffer());
206 
207   int *mask = (int *)tenMask_;
208 
209 #ifdef DEBUG
210   printf("Executing BP %s: grad_output %p, grad_input %p\n",NNNode::nname_.c_str(), gtop, gbot);
211   printf("Grad Outputs: %d\n", N*H*W);
212   printf("Grad Inputs: %d\n", N*H*W);
213 #endif
214 
215   assert(eptr_->get_execution_mode() == TRAIN);
216 #ifdef _OPENMP
217 #pragma omp parallel for
218 #endif
219   for (int i = 0; i < M*N*H*W; i++)
220     gbot[i] = gtop[i] * mask[i] * scale_;
221 
222 #ifdef DEBUG
223   MeanOfLayer((char*)bottom_[0].c_str(), gtop, M*N*H*W);
224   MeanOfLayer((char*)top_[0].c_str(), gbot, M*N*H*W);
225 #endif
226 }
227