1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #include "normalize.h"
16
17 #include <math.h>
18
19 namespace ncnn {
20
Normalize()21 Normalize::Normalize()
22 {
23 one_blob_only = true;
24 support_inplace = true;
25 }
26
load_param(const ParamDict & pd)27 int Normalize::load_param(const ParamDict& pd)
28 {
29 across_spatial = pd.get(0, 0);
30 across_channel = pd.get(4, 1);
31 channel_shared = pd.get(1, 0);
32 eps = pd.get(2, 0.0001f);
33 eps_mode = pd.get(9, 0);
34 scale_data_size = pd.get(3, 0);
35
36 return 0;
37 }
38
load_model(const ModelBin & mb)39 int Normalize::load_model(const ModelBin& mb)
40 {
41 scale_data = mb.load(scale_data_size, 1);
42 if (scale_data.empty())
43 return -100;
44
45 return 0;
46 }
47
forward_inplace(Mat & bottom_top_blob,const Option & opt) const48 int Normalize::forward_inplace(Mat& bottom_top_blob, const Option& opt) const
49 {
50 int w = bottom_top_blob.w;
51 int h = bottom_top_blob.h;
52 int channels = bottom_top_blob.c;
53 size_t elemsize = bottom_top_blob.elemsize;
54 int size = w * h;
55
56 if (across_spatial && across_channel)
57 {
58 // square
59 Mat square_sum_blob;
60 square_sum_blob.create(channels, elemsize, opt.workspace_allocator);
61 if (square_sum_blob.empty())
62 return -100;
63
64 #pragma omp parallel for num_threads(opt.num_threads)
65 for (int q = 0; q < channels; q++)
66 {
67 const float* ptr = bottom_top_blob.channel(q);
68
69 float ssum = 0.f;
70 for (int i = 0; i < size; i++)
71 {
72 ssum += ptr[i] * ptr[i];
73 }
74
75 square_sum_blob[q] = ssum;
76 }
77
78 float ssum = 0.f;
79 for (int q = 0; q < channels; q++)
80 {
81 ssum += square_sum_blob[q];
82 }
83
84 float a;
85 if (eps_mode == 0) // caffe/mxnet
86 {
87 a = static_cast<float>(1.f / sqrt(ssum + eps));
88 }
89 else if (eps_mode == 1) // pytorch
90 {
91 a = 1.f / std::max((float)sqrt(ssum), eps);
92 }
93 else //if (eps_mode == 2) // tensorflow
94 {
95 a = static_cast<float>(1.f / sqrt(std::max(ssum, eps)));
96 }
97
98 if (channel_shared)
99 {
100 float scale = a * scale_data[0];
101
102 #pragma omp parallel for num_threads(opt.num_threads)
103 for (int q = 0; q < channels; q++)
104 {
105 float* ptr = bottom_top_blob.channel(q);
106
107 for (int i = 0; i < size; i++)
108 {
109 ptr[i] = ptr[i] * scale;
110 }
111 }
112 }
113 else
114 {
115 #pragma omp parallel for num_threads(opt.num_threads)
116 for (int q = 0; q < channels; q++)
117 {
118 float* ptr = bottom_top_blob.channel(q);
119 float scale = a * scale_data[q];
120
121 for (int i = 0; i < size; i++)
122 {
123 ptr[i] = ptr[i] * scale;
124 }
125 }
126 }
127
128 return 0;
129 }
130
131 if (across_spatial && !across_channel)
132 {
133 #pragma omp parallel for num_threads(opt.num_threads)
134 for (int q = 0; q < channels; q++)
135 {
136 float* ptr = bottom_top_blob.channel(q);
137
138 float ssum = 0.f;
139 for (int i = 0; i < size; i++)
140 {
141 ssum += ptr[i] * ptr[i];
142 }
143
144 float a;
145 if (eps_mode == 0) // caffe/mxnet
146 {
147 a = static_cast<float>(1.f / sqrt(ssum + eps));
148 }
149 else if (eps_mode == 1) // pytorch
150 {
151 a = 1.f / std::max((float)sqrt(ssum), eps);
152 }
153 else //if (eps_mode == 2) // tensorflow
154 {
155 a = static_cast<float>(1.f / sqrt(std::max(ssum, eps)));
156 }
157
158 float scale = a * (channel_shared ? scale_data[0] : scale_data[q]);
159
160 for (int i = 0; i < size; i++)
161 {
162 ptr[i] = ptr[i] * scale;
163 }
164 }
165
166 return 0;
167 }
168
169 if (!across_spatial && across_channel)
170 {
171 // square sum, 1 / sqrt(ssum)
172 Mat square_sum_blob;
173 square_sum_blob.create(size, elemsize, opt.workspace_allocator);
174 if (square_sum_blob.empty())
175 return -100;
176
177 if (channel_shared)
178 {
179 float scale = scale_data[0];
180
181 #pragma omp parallel for num_threads(opt.num_threads)
182 for (int i = 0; i < size; i++)
183 {
184 float ssum = 0.f;
185 for (int q = 0; q < channels; q++)
186 {
187 const float* ptr = bottom_top_blob.channel(q);
188 ssum += ptr[i] * ptr[i];
189 }
190
191 float a;
192 if (eps_mode == 0) // caffe/mxnet
193 {
194 a = static_cast<float>(1.f / sqrt(ssum + eps));
195 }
196 else if (eps_mode == 1) // pytorch
197 {
198 a = 1.f / std::max((float)sqrt(ssum), eps);
199 }
200 else //if (eps_mode == 2) // tensorflow
201 {
202 a = static_cast<float>(1.f / sqrt(std::max(ssum, eps)));
203 }
204
205 square_sum_blob[i] = a * scale;
206 }
207
208 #pragma omp parallel for num_threads(opt.num_threads)
209 for (int q = 0; q < channels; q++)
210 {
211 float* ptr = bottom_top_blob.channel(q);
212
213 for (int i = 0; i < size; i++)
214 {
215 ptr[i] = ptr[i] * square_sum_blob[i];
216 }
217 }
218 }
219 else
220 {
221 #pragma omp parallel for num_threads(opt.num_threads)
222 for (int i = 0; i < size; i++)
223 {
224 float ssum = 0.f;
225 for (int q = 0; q < channels; q++)
226 {
227 const float* ptr = bottom_top_blob.channel(q);
228 ssum += ptr[i] * ptr[i];
229 }
230
231 float a;
232 if (eps_mode == 0) // caffe/mxnet
233 {
234 a = static_cast<float>(1.f / sqrt(ssum + eps));
235 }
236 else if (eps_mode == 1) // pytorch
237 {
238 a = 1.f / std::max((float)sqrt(ssum), eps);
239 }
240 else //if (eps_mode == 2) // tensorflow
241 {
242 a = static_cast<float>(1.f / sqrt(std::max(ssum, eps)));
243 }
244
245 square_sum_blob[i] = a;
246 }
247
248 #pragma omp parallel for num_threads(opt.num_threads)
249 for (int q = 0; q < channels; q++)
250 {
251 float* ptr = bottom_top_blob.channel(q);
252 float scale = scale_data[q];
253
254 for (int i = 0; i < size; i++)
255 {
256 ptr[i] = ptr[i] * square_sum_blob[i] * scale;
257 }
258 }
259 }
260
261 return 0;
262 }
263
264 return 0;
265 }
266
267 } // namespace ncnn
268