1 /******************************************************************************
2 * Copyright (c) Intel Corporation - All rights reserved. *
3 * This file is part of the LIBXSMM library. *
4 * *
5 * For information on the license, see the LICENSE file. *
6 * Further information: https://github.com/hfp/libxsmm/ *
7 * SPDX-License-Identifier: BSD-3-Clause *
8 ******************************************************************************/
9 /* Sasikanth Avancha, Dhiraj Kalamkar (Intel Corp.)
10 ******************************************************************************/
11
12
13 #include <string>
14 #include "ReLU.hpp"
15
16 using namespace std;
17 using namespace gxm;
18
ReLUNode(ReLUParams * p,MLEngine * e)19 ReLUNode::ReLUNode(ReLUParams* p, MLEngine* e): NNNode(p, e)
20 {
21 nname_ = p->get_node_name();
22 ntype_ = p->get_node_type();
23 mode_ = p->get_mode();
24 bottom_ = p->get_bottom_names();
25 top_ = p->get_top_names();
26 bp_flag_ = p->get_bprop_flag();
27 has_weights_ = false;
28 bot_compute_engine_ = p->get_compute_engine();
29
30 assert((bottom_.size() == 1) && (top_.size() == 1));
31
32 tenTop_ = new Tensor(top_[0]);
33 assert(tenTop_ != NULL);
34 tenTop_->setOwner(this);
35 tenTop_->setType(ACT);
36 tenTopData_ = tenTop_->getBuf(DATA);
37 tenTopData_->setBufferType(DATA);
38
39 #ifdef DEBUG
40 printf("bottom name %s\n",bottom_[0].c_str());
41 #endif
42
43 tenBot_ = e->get_tensor(bottom_[0], ACT);
44 assert(tenBot_ != NULL);
45 NNNode *pnn = (NNNode*)tenBot_->getOwner();
46 setPrevNode(pnn);
47 pnn->set_top_compute_engine(p->get_compute_engine());
48 bot_cengine_ = pnn->get_bot_compute_engine();
49
50 tenBotData_ = tenBot_->getBuf(DATA);
51
52 //Output tensor data type = input tensor data type
53 int dtype = p->get_data_type();
54 tenTopData_->setDataType(dtype);
55
56 // Get input tensor shape (bottom)
57 Shape* bs = tenBot_->getShape();
58 assert(bs->ndims <= MAX_DIMS);
59
60 tenTop_->setShape(bs);
61
62 long long int tsize = 1;
63 for(int i=0; i<bs->ndims; i++)
64 tsize = tsize*bs->dims[i];
65
66 if(dtype == DT_FLOAT)
67 tsize = tsize*sizeof(float);
68 else if(dtype == DT_INT)
69 tsize = tsize*sizeof(int);
70
71 // Set the logical size of the tensor buffer for bufId=0 (forward data buffer).
72 // Note: we have no knowledge of the machine parameters here, so effectively this is single-machine config
73 tenTopData_->setBufferSize(tsize);
74
75 if(!e->is_inference_only())
76 {
77 if(bp_flag_)
78 {
79 tenBotDiff_ = tenBot_->addBuf(); // DIFF type and index
80 tenBotDiff_->setDataType(dtype);
81 tenBotDiff_->setBufferType(DIFF);
82
83 long long int bsize = 1;
84 for(int i=0; i<bs->ndims; i++)
85 bsize = bsize*bs->dims[i];
86 if(dtype == DT_FLOAT)
87 bsize = bsize*sizeof(float);
88 else if(dtype == DT_INT)
89 bsize = bsize*sizeof(int);
90
91 // Set the size of the input-gradient buffer
92 tenBotDiff_->setBufferSize(bsize);
93 }
94 }
95 else
96 tenBotDiff_ = NULL;
97
98 // Register output tensor in tensor map
99 bool inserted = e->register_tensor(top_[0], ACT, tenTop_);
100 if(!inserted)
101 printf("Warning: Tensor %s already registered\n",top_[0].c_str());
102
103 gparams_.bdims = gparams_.tdims = bs->ndims;
104 gparams_.batch_size = bs->dims[0];
105 gparams_.node_name = nname_;
106 gparams_.nInput = bs->dims[1];
107 gparams_.nOutput = gparams_.nInput;
108 if(bs->ndims == 5)
109 {
110 gparams_.iDepth = gparams_.iHeight = gparams_.iWidth = bs->dims[2];
111 gparams_.oDepth = gparams_.oHeight = gparams_.oWidth = bs->dims[3];
112 }
113 else if(bs->ndims == 4)
114 {
115 gparams_.iDepth = gparams_.oDepth = 0;
116 gparams_.iHeight = gparams_.oHeight = bs->dims[2];
117 gparams_.iWidth = gparams_.oWidth = bs->dims[3];
118 }
119
120 gparams_.negative_slope = p->get_negative_slope();
121
122 gparams_.data_type = dtype;
123 gparams_.algType = p->get_algo_type();
124 gparams_.num_threads = e->get_num_threads();
125
126 configure(p->get_compute_engine());
127
128 eptr_ = e;
129 };
130
configure(int engine)131 void ReLUNode::configure(int engine)
132 {
133 switch(engine)
134 {
135 case XSMM:
136 impl = new ReLUXSMM(&gparams_, engine);
137 break;
138 }
139 }
140
forwardPropagate()141 void ReLUNode::forwardPropagate()
142 {
143 #ifdef DEBUG
144 float* bot = (float*)(tenBotData_->getBuffer());
145 float* top = (float*)(tenTopData_->getBuffer());
146
147 printf("Executing FP %s: input %p, output %p\n",NNNode::nname_.c_str(), bot, top);
148 if(gparams_.bdims > 4)
149 printf("Inputs: %d x %d x %d x %d\n",gparams_.nInput, gparams_.iDepth, gparams_.iHeight, gparams_.iWidth);
150 else if(gparams_.bdims > 3)
151 printf("Inputs: %d x %d x %d\n",gparams_.nInput, gparams_.iHeight, gparams_.iWidth);
152
153 if(gparams_.tdims > 4)
154 printf("Outputs: %d x %d x %d x %d\n",gparams_.nOutput, gparams_.oDepth, gparams_.oHeight, gparams_.oWidth);
155 else if(gparams_.tdims > 3)
156 printf("Outputs: %d x %d x %d\n",gparams_.nOutput, gparams_.oHeight, gparams_.oWidth);
157 #endif
158
159 impl->set_bot_compute_engine(bot_cengine_);
160 impl->set_top_compute_engine(top_compute_engine_);
161 impl->forwardPropagate(tenBotData_, tenTopData_);
162
163 #ifdef GETSTATS
164 #ifdef USE_MLSL
165 size_t node_id = MLSL::Environment::GetEnv().GetProcessIdx();
166 #else
167 size_t node_id = 0;
168 #endif
169 if(node_id == 0 && eptr_->get_current_batch() % STATFREQ == 0)
170 {
171 float *ptr = (float*)tenBotData_->getBuffer();
172 float *pptr = (float*)tenBotData_->getPrivBuffer();
173 float *p = (pptr == NULL) ? ptr : pptr;
174 string s = nname_ + "_Inp";
175 MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nInput* gparams_.iHeight*gparams_.iWidth);
176
177 ptr = (float*)tenTopData_->getBuffer();
178 pptr = (float*)tenTopData_->getPrivBuffer();
179 p = (pptr == NULL) ? ptr : pptr;
180 s = nname_ + "_Outp";
181 MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nOutput* gparams_.oHeight*gparams_.oWidth);
182 }
183 #endif
184 }
185
backPropagate()186 void ReLUNode::backPropagate()
187 {
188
189 tenTopDiff_ = tenTop_->getBuf(DIFF);
190
191 #ifdef DEBUG
192 float *gtop = (float*)(tenTopDiff_->getBuffer());
193 assert(gtop != NULL);
194 float* gbot = (float*)(tenBotDiff_->getBuffer());
195 float* bot = (float*)(tenBotData_->getBuffer());
196
197 printf("Executing BP %s: grad_output %p, grad_input %p\n",NNNode::nname_.c_str(), gtop, gbot);
198 if(gparams_.bdims > 4)
199 {
200 printf("Inputs: %d x %d x %d x %d\n",gparams_.nInput, gparams_.iDepth, gparams_.iHeight, gparams_.iWidth);
201 printf("Grad Inputs: %d x %d x %d x %d\n",gparams_.nInput, gparams_.iDepth, gparams_.iHeight, gparams_.iWidth);
202 }
203 else if(gparams_.bdims > 3)
204 {
205 printf("Inputs: %d x %d x %d\n",gparams_.nInput, gparams_.iHeight, gparams_.iWidth);
206 printf("Grad Inputs: %d x %d x %d\n",gparams_.nInput, gparams_.iHeight, gparams_.iWidth);
207 }
208
209 if(gparams_.tdims > 4)
210 printf("Grad Outputs: %d x %d x %d x %d\n",gparams_.nOutput, gparams_.oDepth, gparams_.oHeight, gparams_.oWidth);
211 else if(gparams_.tdims > 3)
212 printf("Grad Outputs: %d x %d x %d\n",gparams_.nOutput, gparams_.oHeight, gparams_.oWidth);
213 #endif
214
215 impl->backPropagate(tenBotData_, tenTopDiff_, tenBotDiff_);
216
217 #ifdef GETSTATS
218 #ifdef USE_MLSL
219 size_t node_id = MLSL::Environment::GetEnv().GetProcessIdx();
220 #else
221 size_t node_id = 0;
222 #endif
223 if(node_id == 0 && eptr_->get_current_batch() % STATFREQ == 0)
224 {
225 float *ptr = (float*)tenTopDiff_->getBuffer();
226 float *pptr = (float*)tenTopDiff_->getPrivBuffer();
227 float *p = (pptr == NULL) ? ptr : pptr;
228
229 string s = nname_ + "_delOutp";
230 MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nOutput* gparams_.oHeight*gparams_.oWidth);
231
232 ptr = (float*)tenBotDiff_->getBuffer();
233 pptr = (float*)tenBotDiff_->getPrivBuffer();
234 p = (pptr == NULL) ? ptr : pptr;
235
236 s = nname_ + "_delInp";
237 MeanOfLayer((char*)s.c_str(), p, gparams_.batch_size*gparams_.nInput* gparams_.iHeight*gparams_.iWidth);
238 }
239 #endif
240 }
241
242