1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved.                      *
3 * This file is part of the LIBXSMM library.                                   *
4 *                                                                             *
5 * For information on the license, see the LICENSE file.                       *
6 * Further information: https://github.com/hfp/libxsmm/                        *
7 * SPDX-License-Identifier: BSD-3-Clause                                       *
8 ******************************************************************************/
9 /* Sasikanth Avancha, Dhiraj Kalamkar (Intel Corp.)
10 ******************************************************************************/
11 
12 
13 #include <string>
14 #include "ReLU.hpp"
15 
16 using namespace std;
17 using namespace gxm;
18 
ReLUNode(ReLUParams * p,MLEngine * e)19 ReLUNode::ReLUNode(ReLUParams* p, MLEngine* e): NNNode(p, e)
20 {
21   nname_ = p->get_node_name();
22   ntype_ = p->get_node_type();
23   mode_ = p->get_mode();
24   bottom_ = p->get_bottom_names();
25   top_ = p->get_top_names();
26   bp_flag_ = p->get_bprop_flag();
27   has_weights_ = false;
28   bot_compute_engine_ = p->get_compute_engine();
29 
30   assert((bottom_.size() == 1) && (top_.size() == 1));
31 
32   tenTop_ = new Tensor(top_[0]);
33   assert(tenTop_ != NULL);
34   tenTop_->setOwner(this);
35   tenTop_->setType(ACT);
36   tenTopData_ = tenTop_->getBuf(DATA);
37   tenTopData_->setBufferType(DATA);
38 
39 #ifdef DEBUG
40   printf("bottom name %s\n",bottom_[0].c_str());
41 #endif
42 
43   tenBot_ = e->get_tensor(bottom_[0], ACT);
44   assert(tenBot_ != NULL);
45   NNNode *pnn = (NNNode*)tenBot_->getOwner();
46   setPrevNode(pnn);
47   pnn->set_top_compute_engine(p->get_compute_engine());
48   bot_cengine_ = pnn->get_bot_compute_engine();
49 
50   tenBotData_ = tenBot_->getBuf(DATA);
51 
52   //Output tensor data type = input tensor data type
53   int dtype = p->get_data_type();
54   tenTopData_->setDataType(dtype);
55 
56   // Get input tensor shape (bottom)
57   Shape* bs = tenBot_->getShape();
58   assert(bs->ndims <= MAX_DIMS);
59 
60   tenTop_->setShape(bs);
61 
62   long long int tsize = 1;
63   for(int i=0; i<bs->ndims; i++)
64     tsize = tsize*bs->dims[i];
65 
66   if(dtype == DT_FLOAT)
67     tsize = tsize*sizeof(float);
68   else if(dtype == DT_INT)
69     tsize = tsize*sizeof(int);
70 
71   // Set the logical size of the tensor buffer for bufId=0 (forward data buffer).
72   // Note: we have no knowledge of the machine parameters here, so effectively this is single-machine config
73   tenTopData_->setBufferSize(tsize);
74 
75   if(!e->is_inference_only())
76   {
77     if(bp_flag_)
78     {
79       tenBotDiff_ = tenBot_->addBuf(); // DIFF type and index
80       tenBotDiff_->setDataType(dtype);
81       tenBotDiff_->setBufferType(DIFF);
82 
83       long long int bsize = 1;
84       for(int i=0; i<bs->ndims; i++)
85         bsize = bsize*bs->dims[i];
86       if(dtype == DT_FLOAT)
87         bsize = bsize*sizeof(float);
88       else if(dtype == DT_INT)
89         bsize = bsize*sizeof(int);
90 
91       // Set the size of the input-gradient buffer
92       tenBotDiff_->setBufferSize(bsize);
93     }
94   }
95   else
96     tenBotDiff_ = NULL;
97 
98   // Register output tensor in tensor map
99   bool inserted = e->register_tensor(top_[0], ACT, tenTop_);
100   if(!inserted)
101     printf("Warning: Tensor %s already registered\n",top_[0].c_str());
102 
103   gparams_.bdims = gparams_.tdims = bs->ndims;
104   gparams_.batch_size = bs->dims[0];
105   gparams_.node_name = nname_;
106   gparams_.nInput = bs->dims[1];
107   gparams_.nOutput = gparams_.nInput;
108   if(bs->ndims == 5)
109   {
110     gparams_.iDepth = gparams_.iHeight = gparams_.iWidth = bs->dims[2];
111     gparams_.oDepth = gparams_.oHeight = gparams_.oWidth = bs->dims[3];
112   }
113   else if(bs->ndims == 4)
114   {
115     gparams_.iDepth = gparams_.oDepth = 0;
116     gparams_.iHeight = gparams_.oHeight = bs->dims[2];
117     gparams_.iWidth = gparams_.oWidth = bs->dims[3];
118   }
119 
120   gparams_.negative_slope = p->get_negative_slope();
121 
122   gparams_.data_type = dtype;
123   gparams_.algType = p->get_algo_type();
124   gparams_.num_threads = e->get_num_threads();
125 
126   configure(p->get_compute_engine());
127 
128   eptr_ = e;
129 };
130 
configure(int engine)131 void ReLUNode::configure(int engine)
132 {
133   switch(engine)
134   {
135     case XSMM:
136       impl = new ReLUXSMM(&gparams_, engine);
137       break;
138   }
139 }
140 
forwardPropagate()141 void ReLUNode::forwardPropagate()
142 {
143 #ifdef DEBUG
144   float* bot = (float*)(tenBotData_->getBuffer());
145   float* top = (float*)(tenTopData_->getBuffer());
146 
147   printf("Executing FP %s: input %p, output %p\n",NNNode::nname_.c_str(), bot, top);
148   if(gparams_.bdims > 4)
149     printf("Inputs: %d x %d x %d x %d\n",gparams_.nInput, gparams_.iDepth, gparams_.iHeight, gparams_.iWidth);
150   else if(gparams_.bdims > 3)
151     printf("Inputs: %d x %d x %d\n",gparams_.nInput, gparams_.iHeight, gparams_.iWidth);
152 
153   if(gparams_.tdims > 4)
154     printf("Outputs: %d x %d x %d x %d\n",gparams_.nOutput, gparams_.oDepth, gparams_.oHeight, gparams_.oWidth);
155   else if(gparams_.tdims > 3)
156     printf("Outputs: %d x %d x %d\n",gparams_.nOutput, gparams_.oHeight, gparams_.oWidth);
157 #endif
158 
159   impl->set_bot_compute_engine(bot_cengine_);
160   impl->set_top_compute_engine(top_compute_engine_);
161   impl->forwardPropagate(tenBotData_, tenTopData_);
162 
163 #ifdef GETSTATS
164 #ifdef USE_MLSL
165   size_t node_id = MLSL::Environment::GetEnv().GetProcessIdx();
166 #else
167   size_t node_id = 0;
168 #endif
169   if(node_id == 0 && eptr_->get_current_batch() % STATFREQ == 0)
170   {
171     float *ptr = (float*)tenBotData_->getBuffer();
172     float *pptr = (float*)tenBotData_->getPrivBuffer();
173     float *p = (pptr == NULL) ? ptr : pptr;
174     string s = nname_ + "_Inp";
175     MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nInput* gparams_.iHeight*gparams_.iWidth);
176 
177     ptr = (float*)tenTopData_->getBuffer();
178     pptr = (float*)tenTopData_->getPrivBuffer();
179     p = (pptr == NULL) ? ptr : pptr;
180     s = nname_ + "_Outp";
181     MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nOutput* gparams_.oHeight*gparams_.oWidth);
182   }
183 #endif
184 }
185 
backPropagate()186 void ReLUNode::backPropagate()
187 {
188 
189   tenTopDiff_ = tenTop_->getBuf(DIFF);
190 
191 #ifdef DEBUG
192   float *gtop = (float*)(tenTopDiff_->getBuffer());
193   assert(gtop != NULL);
194   float* gbot = (float*)(tenBotDiff_->getBuffer());
195   float* bot = (float*)(tenBotData_->getBuffer());
196 
197   printf("Executing BP %s: grad_output %p, grad_input %p\n",NNNode::nname_.c_str(), gtop, gbot);
198   if(gparams_.bdims > 4)
199   {
200     printf("Inputs: %d x %d x %d x %d\n",gparams_.nInput, gparams_.iDepth, gparams_.iHeight, gparams_.iWidth);
201     printf("Grad Inputs: %d x %d x %d x %d\n",gparams_.nInput, gparams_.iDepth, gparams_.iHeight, gparams_.iWidth);
202   }
203   else if(gparams_.bdims > 3)
204   {
205     printf("Inputs: %d x %d x %d\n",gparams_.nInput, gparams_.iHeight, gparams_.iWidth);
206     printf("Grad Inputs: %d x %d x %d\n",gparams_.nInput, gparams_.iHeight, gparams_.iWidth);
207   }
208 
209   if(gparams_.tdims > 4)
210     printf("Grad Outputs: %d x %d x %d x %d\n",gparams_.nOutput, gparams_.oDepth, gparams_.oHeight, gparams_.oWidth);
211   else if(gparams_.tdims > 3)
212     printf("Grad Outputs: %d x %d x %d\n",gparams_.nOutput, gparams_.oHeight, gparams_.oWidth);
213 #endif
214 
215   impl->backPropagate(tenBotData_, tenTopDiff_, tenBotDiff_);
216 
217 #ifdef GETSTATS
218 #ifdef USE_MLSL
219   size_t node_id = MLSL::Environment::GetEnv().GetProcessIdx();
220 #else
221   size_t node_id = 0;
222 #endif
223   if(node_id == 0 && eptr_->get_current_batch() % STATFREQ == 0)
224   {
225     float *ptr = (float*)tenTopDiff_->getBuffer();
226     float *pptr = (float*)tenTopDiff_->getPrivBuffer();
227     float *p = (pptr == NULL) ? ptr : pptr;
228 
229     string s = nname_ + "_delOutp";
230     MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nOutput* gparams_.oHeight*gparams_.oWidth);
231 
232     ptr = (float*)tenBotDiff_->getBuffer();
233     pptr = (float*)tenBotDiff_->getPrivBuffer();
234     p = (pptr == NULL) ? ptr : pptr;
235 
236     s = nname_ + "_delInp";
237     MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nInput* gparams_.iHeight*gparams_.iWidth);
238   }
239 #endif
240 }
241 
242