1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // author:BUG1989 (https://github.com/BUG1989/) Long-term support.
4 // author:JansonZhu (https://github.com/JansonZhu) Implemented the function of entropy calibration.
5 //
6 // Copyright (C) 2019 BUG1989. All rights reserved.
7 // Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
8 //
9 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
10 // in compliance with the License. You may obtain a copy of the License at
11 //
12 // https://opensource.org/licenses/BSD-3-Clause
13 //
14 // Unless required by applicable law or agreed to in writing, software distributed
15 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
16 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
17 // specific language governing permissions and limitations under the License.
18
19 #ifdef _MSC_VER
20 #define _CRT_SECURE_NO_DEPRECATE
21 #endif
22
23 #include <float.h>
24 #include <limits.h>
25 #include <math.h>
26 #include <stdio.h>
27 #include <stdint.h>
28 #include <stdlib.h>
29 #include <string.h>
30
31 #if defined(USE_NCNN_SIMPLEOCV)
32 #include "simpleocv.h"
33 #elif defined(USE_LOCAL_IMREADWRITE)
34 #include "imreadwrite.h"
35 #else
36 #include <opencv2/core/core.hpp>
37 #include <opencv2/highgui/highgui.hpp>
38 #endif
39 #include <string>
40 #include <vector>
41
42 // ncnn public header
43 #include "benchmark.h"
44 #include "cpu.h"
45 #include "net.h"
46
47 // ncnn private header
48 #include "layer/convolution.h"
49 #include "layer/convolutiondepthwise.h"
50 #include "layer/innerproduct.h"
51
52 class QuantBlobStat
53 {
54 public:
QuantBlobStat()55 QuantBlobStat()
56 {
57 threshold = 0.f;
58 absmax = 0.f;
59 total = 0;
60 }
61
62 public:
63 float threshold;
64 float absmax;
65
66 // ACIQ
67 int total;
68
69 // KL
70 std::vector<uint64_t> histogram;
71 std::vector<float> histogram_normed;
72 };
73
74 class QuantNet : public ncnn::Net
75 {
76 public:
77 QuantNet();
78
79 std::vector<ncnn::Blob>& blobs;
80 std::vector<ncnn::Layer*>& layers;
81
82 public:
83 std::vector<std::vector<std::string> > listspaths;
84 std::vector<std::vector<float> > means;
85 std::vector<std::vector<float> > norms;
86 std::vector<std::vector<int> > shapes;
87 std::vector<int> type_to_pixels;
88 int quantize_num_threads;
89
90 public:
91 int init();
92 void print_quant_info() const;
93 int save_table(const char* tablepath);
94 int quantize_KL();
95 int quantize_ACIQ();
96 int quantize_EQ();
97
98 public:
99 std::vector<int> input_blobs;
100 std::vector<int> conv_layers;
101 std::vector<int> conv_bottom_blobs;
102 std::vector<int> conv_top_blobs;
103
104 // result
105 std::vector<QuantBlobStat> quant_blob_stats;
106 std::vector<ncnn::Mat> weight_scales;
107 std::vector<ncnn::Mat> bottom_blob_scales;
108 };
109
QuantNet()110 QuantNet::QuantNet()
111 : blobs(mutable_blobs()), layers(mutable_layers())
112 {
113 quantize_num_threads = ncnn::get_cpu_count();
114 }
115
init()116 int QuantNet::init()
117 {
118 // find all input layers
119 for (int i = 0; i < (int)layers.size(); i++)
120 {
121 const ncnn::Layer* layer = layers[i];
122 if (layer->type == "Input")
123 {
124 input_blobs.push_back(layer->tops[0]);
125 }
126 }
127
128 // find all conv layers
129 for (int i = 0; i < (int)layers.size(); i++)
130 {
131 const ncnn::Layer* layer = layers[i];
132 if (layer->type == "Convolution" || layer->type == "ConvolutionDepthWise" || layer->type == "InnerProduct")
133 {
134 conv_layers.push_back(i);
135 conv_bottom_blobs.push_back(layer->bottoms[0]);
136 conv_top_blobs.push_back(layer->tops[0]);
137 }
138 }
139
140 const int conv_layer_count = (int)conv_layers.size();
141 const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
142
143 quant_blob_stats.resize(conv_bottom_blob_count);
144 weight_scales.resize(conv_layer_count);
145 bottom_blob_scales.resize(conv_bottom_blob_count);
146
147 return 0;
148 }
149
save_table(const char * tablepath)150 int QuantNet::save_table(const char* tablepath)
151 {
152 FILE* fp = fopen(tablepath, "wb");
153 if (!fp)
154 {
155 fprintf(stderr, "fopen %s failed\n", tablepath);
156 return -1;
157 }
158
159 const int conv_layer_count = (int)conv_layers.size();
160 const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
161
162 for (int i = 0; i < conv_layer_count; i++)
163 {
164 const ncnn::Mat& weight_scale = weight_scales[i];
165
166 fprintf(fp, "%s_param_0 ", layers[conv_layers[i]]->name.c_str());
167 for (int j = 0; j < weight_scale.w; j++)
168 {
169 fprintf(fp, "%f ", weight_scale[j]);
170 }
171 fprintf(fp, "\n");
172 }
173
174 for (int i = 0; i < conv_bottom_blob_count; i++)
175 {
176 const ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i];
177
178 fprintf(fp, "%s ", layers[conv_layers[i]]->name.c_str());
179 for (int j = 0; j < bottom_blob_scale.w; j++)
180 {
181 fprintf(fp, "%f ", bottom_blob_scale[j]);
182 }
183 fprintf(fp, "\n");
184 }
185
186 fclose(fp);
187
188 fprintf(stderr, "ncnn int8 calibration table create success, best wish for your int8 inference has a low accuracy loss...\\(^0^)/...233...\n");
189
190 return 0;
191 }
192
print_quant_info() const193 void QuantNet::print_quant_info() const
194 {
195 for (int i = 0; i < (int)conv_bottom_blobs.size(); i++)
196 {
197 const QuantBlobStat& stat = quant_blob_stats[i];
198
199 float scale = 127 / stat.threshold;
200
201 fprintf(stderr, "%-40s : max = %-15f threshold = %-15f scale = %-15f\n", layers[conv_layers[i]]->name.c_str(), stat.absmax, stat.threshold, scale);
202 }
203 }
204
205 /**
206 * Read and resize image
207 * shape is input as [w,h,...]
208 * if w and h both are given, image will be resized to exactly size.
209 * if w and h both are zero or negative, image will not be resized.
210 * if only h is zero or negative, image's width will scaled resize to w, keeping aspect ratio.
211 * if only w is zero or negative, image's height will scaled resize to h
212 * @return ncnn::Mat
213 */
214
read_and_resize_image(const std::vector<int> & shape,const std::string & imagepath,int pixel_convert_type)215 inline ncnn::Mat read_and_resize_image(const std::vector<int>& shape, const std::string& imagepath, int pixel_convert_type)
216 {
217 int target_w = shape[0];
218 int target_h = shape[1];
219 cv::Mat bgr = cv::imread(imagepath, 1);
220 if (target_h <= 0 && target_w <= 0)
221 {
222 return ncnn::Mat::from_pixels(bgr.data, pixel_convert_type, bgr.cols, bgr.rows);
223 }
224 if (target_h <= 0 || target_w <= 0)
225 {
226 float scale = 1.0;
227 if (target_h <= 0)
228 {
229 scale = 1.0 * bgr.cols / target_w;
230 target_h = int(1.0 * bgr.rows / scale);
231 }
232 if (target_w <= 0)
233 {
234 scale = 1.0 * bgr.rows / target_h;
235 target_w = int(1.0 * bgr.cols / scale);
236 }
237 }
238 return ncnn::Mat::from_pixels_resize(bgr.data, pixel_convert_type, bgr.cols, bgr.rows, target_w, target_h);
239 }
240
compute_kl_divergence(const std::vector<float> & a,const std::vector<float> & b)241 static float compute_kl_divergence(const std::vector<float>& a, const std::vector<float>& b)
242 {
243 const size_t length = a.size();
244
245 float result = 0;
246 for (size_t i = 0; i < length; i++)
247 {
248 result += a[i] * log(a[i] / b[i]);
249 }
250
251 return result;
252 }
253
quantize_KL()254 int QuantNet::quantize_KL()
255 {
256 const int input_blob_count = (int)input_blobs.size();
257 const int conv_layer_count = (int)conv_layers.size();
258 const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
259 const int image_count = (int)listspaths[0].size();
260
261 const int num_histogram_bins = 2048;
262
263 std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
264 std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
265
266 // initialize conv weight scales
267 #pragma omp parallel for num_threads(quantize_num_threads)
268 for (int i = 0; i < conv_layer_count; i++)
269 {
270 const ncnn::Layer* layer = layers[conv_layers[i]];
271
272 if (layer->type == "Convolution")
273 {
274 const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer;
275
276 const int num_output = convolution->num_output;
277 const int kernel_w = convolution->kernel_w;
278 const int kernel_h = convolution->kernel_h;
279 const int dilation_w = convolution->dilation_w;
280 const int dilation_h = convolution->dilation_h;
281 const int stride_w = convolution->stride_w;
282 const int stride_h = convolution->stride_h;
283
284 const int weight_data_size_output = convolution->weight_data_size / num_output;
285
286 // int8 winograd F43 needs weight data to use 6bit quantization
287 // TODO proper condition for winograd 3x3 int8
288 bool quant_6bit = false;
289 if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
290 quant_6bit = true;
291
292 weight_scales[i].create(num_output);
293
294 for (int n = 0; n < num_output; n++)
295 {
296 const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
297
298 float absmax = 0.f;
299 for (int k = 0; k < weight_data_size_output; k++)
300 {
301 absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
302 }
303
304 if (quant_6bit)
305 {
306 weight_scales[i][n] = 31 / absmax;
307 }
308 else
309 {
310 weight_scales[i][n] = 127 / absmax;
311 }
312 }
313 }
314
315 if (layer->type == "ConvolutionDepthWise")
316 {
317 const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer;
318
319 const int group = convolutiondepthwise->group;
320 const int weight_data_size_output = convolutiondepthwise->weight_data_size / group;
321
322 std::vector<float> scales;
323
324 weight_scales[i].create(group);
325
326 for (int n = 0; n < group; n++)
327 {
328 const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
329
330 float absmax = 0.f;
331 for (int k = 0; k < weight_data_size_output; k++)
332 {
333 absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
334 }
335
336 weight_scales[i][n] = 127 / absmax;
337 }
338 }
339
340 if (layer->type == "InnerProduct")
341 {
342 const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer;
343
344 const int num_output = innerproduct->num_output;
345 const int weight_data_size_output = innerproduct->weight_data_size / num_output;
346
347 weight_scales[i].create(num_output);
348
349 for (int n = 0; n < num_output; n++)
350 {
351 const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
352
353 float absmax = 0.f;
354 for (int k = 0; k < weight_data_size_output; k++)
355 {
356 absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
357 }
358
359 weight_scales[i][n] = 127 / absmax;
360 }
361 }
362 }
363
364 // count the absmax
365 #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
366 for (int i = 0; i < image_count; i++)
367 {
368 if (i % 100 == 0)
369 {
370 fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
371 }
372
373 ncnn::Extractor ex = create_extractor();
374
375 const int thread_num = ncnn::get_omp_thread_num();
376 ex.set_blob_allocator(&blob_allocators[thread_num]);
377 ex.set_workspace_allocator(&workspace_allocators[thread_num]);
378
379 for (int j = 0; j < input_blob_count; j++)
380 {
381 const int type_to_pixel = type_to_pixels[j];
382 const std::vector<float>& mean_vals = means[j];
383 const std::vector<float>& norm_vals = norms[j];
384
385 int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
386 if (type_to_pixel != pixel_convert_type)
387 {
388 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
389 }
390
391 ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
392
393 in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
394
395 ex.input(input_blobs[j], in);
396 }
397
398 for (int j = 0; j < conv_bottom_blob_count; j++)
399 {
400 ncnn::Mat out;
401 ex.extract(conv_bottom_blobs[j], out);
402
403 // count absmax
404 {
405 float absmax = 0.f;
406
407 const int outc = out.c;
408 const int outsize = out.w * out.h;
409 for (int p = 0; p < outc; p++)
410 {
411 const float* ptr = out.channel(p);
412 for (int k = 0; k < outsize; k++)
413 {
414 absmax = std::max(absmax, (float)fabs(ptr[k]));
415 }
416 }
417
418 #pragma omp critical
419 {
420 QuantBlobStat& stat = quant_blob_stats[j];
421 stat.absmax = std::max(stat.absmax, absmax);
422 }
423 }
424 }
425 }
426
427 // initialize histogram
428 #pragma omp parallel for num_threads(quantize_num_threads)
429 for (int i = 0; i < conv_bottom_blob_count; i++)
430 {
431 QuantBlobStat& stat = quant_blob_stats[i];
432
433 stat.histogram.resize(num_histogram_bins, 0);
434 stat.histogram_normed.resize(num_histogram_bins, 0);
435 }
436
437 // build histogram
438 #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
439 for (int i = 0; i < image_count; i++)
440 {
441 if (i % 100 == 0)
442 {
443 fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
444 }
445
446 ncnn::Extractor ex = create_extractor();
447
448 const int thread_num = ncnn::get_omp_thread_num();
449 ex.set_blob_allocator(&blob_allocators[thread_num]);
450 ex.set_workspace_allocator(&workspace_allocators[thread_num]);
451
452 for (int j = 0; j < input_blob_count; j++)
453 {
454 const int type_to_pixel = type_to_pixels[j];
455 const std::vector<float>& mean_vals = means[j];
456 const std::vector<float>& norm_vals = norms[j];
457
458 int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
459 if (type_to_pixel != pixel_convert_type)
460 {
461 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
462 }
463
464 ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
465
466 in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
467
468 ex.input(input_blobs[j], in);
469 }
470
471 for (int j = 0; j < conv_bottom_blob_count; j++)
472 {
473 ncnn::Mat out;
474 ex.extract(conv_bottom_blobs[j], out);
475
476 // count histogram bin
477 {
478 const float absmax = quant_blob_stats[j].absmax;
479
480 std::vector<uint64_t> histogram(num_histogram_bins, 0);
481
482 const int outc = out.c;
483 const int outsize = out.w * out.h;
484 for (int p = 0; p < outc; p++)
485 {
486 const float* ptr = out.channel(p);
487 for (int k = 0; k < outsize; k++)
488 {
489 if (ptr[k] == 0.f)
490 continue;
491
492 const int index = std::min((int)(fabs(ptr[k]) / absmax * num_histogram_bins), (num_histogram_bins - 1));
493
494 histogram[index] += 1;
495 }
496 }
497
498 #pragma omp critical
499 {
500 QuantBlobStat& stat = quant_blob_stats[j];
501
502 for (int k = 0; k < num_histogram_bins; k++)
503 {
504 stat.histogram[k] += histogram[k];
505 }
506 }
507 }
508 }
509 }
510
511 // using kld to find the best threshold value
512 #pragma omp parallel for num_threads(quantize_num_threads)
513 for (int i = 0; i < conv_bottom_blob_count; i++)
514 {
515 QuantBlobStat& stat = quant_blob_stats[i];
516
517 // normalize histogram bin
518 {
519 uint64_t sum = 0;
520 for (int j = 0; j < num_histogram_bins; j++)
521 {
522 sum += stat.histogram[j];
523 }
524
525 for (int j = 0; j < num_histogram_bins; j++)
526 {
527 stat.histogram_normed[j] = (float)(stat.histogram[j] / (double)sum);
528 }
529 }
530
531 const int target_bin = 128;
532
533 int target_threshold = target_bin;
534 float min_kl_divergence = FLT_MAX;
535
536 for (int threshold = target_bin; threshold < num_histogram_bins; threshold++)
537 {
538 const float kl_eps = 0.0001f;
539
540 std::vector<float> clip_distribution(threshold, kl_eps);
541 {
542 for (int j = 0; j < threshold; j++)
543 {
544 clip_distribution[j] += stat.histogram_normed[j];
545 }
546 for (int j = threshold; j < num_histogram_bins; j++)
547 {
548 clip_distribution[threshold - 1] += stat.histogram_normed[j];
549 }
550 }
551
552 const float num_per_bin = (float)threshold / target_bin;
553
554 std::vector<float> quantize_distribution(target_bin, 0.f);
555 {
556 {
557 const float end = num_per_bin;
558
559 const int right_lower = (int)floor(end);
560 const float right_scale = end - right_lower;
561
562 if (right_scale > 0)
563 {
564 quantize_distribution[0] += right_scale * stat.histogram_normed[right_lower];
565 }
566
567 for (int k = 0; k < right_lower; k++)
568 {
569 quantize_distribution[0] += stat.histogram_normed[k];
570 }
571
572 quantize_distribution[0] /= right_lower + right_scale;
573 }
574 for (int j = 1; j < target_bin - 1; j++)
575 {
576 const float start = j * num_per_bin;
577 const float end = (j + 1) * num_per_bin;
578
579 const int left_upper = (int)ceil(start);
580 const float left_scale = left_upper - start;
581
582 const int right_lower = (int)floor(end);
583 const float right_scale = end - right_lower;
584
585 if (left_scale > 0)
586 {
587 quantize_distribution[j] += left_scale * stat.histogram_normed[left_upper - 1];
588 }
589
590 if (right_scale > 0)
591 {
592 quantize_distribution[j] += right_scale * stat.histogram_normed[right_lower];
593 }
594
595 for (int k = left_upper; k < right_lower; k++)
596 {
597 quantize_distribution[j] += stat.histogram_normed[k];
598 }
599
600 quantize_distribution[j] /= right_lower - left_upper + left_scale + right_scale;
601 }
602 {
603 const float start = threshold - num_per_bin;
604
605 const int left_upper = (int)ceil(start);
606 const float left_scale = left_upper - start;
607
608 if (left_scale > 0)
609 {
610 quantize_distribution[target_bin - 1] += left_scale * stat.histogram_normed[left_upper - 1];
611 }
612
613 for (int k = left_upper; k < threshold; k++)
614 {
615 quantize_distribution[target_bin - 1] += stat.histogram_normed[k];
616 }
617
618 quantize_distribution[target_bin - 1] /= threshold - left_upper + left_scale;
619 }
620 }
621
622 std::vector<float> expand_distribution(threshold, kl_eps);
623 {
624 {
625 const float end = num_per_bin;
626
627 const int right_lower = (int)floor(end);
628 const float right_scale = end - right_lower;
629
630 if (right_scale > 0)
631 {
632 expand_distribution[right_lower] += right_scale * quantize_distribution[0];
633 }
634
635 for (int k = 0; k < right_lower; k++)
636 {
637 expand_distribution[k] += quantize_distribution[0];
638 }
639 }
640 for (int j = 1; j < target_bin - 1; j++)
641 {
642 const float start = j * num_per_bin;
643 const float end = (j + 1) * num_per_bin;
644
645 const int left_upper = (int)ceil(start);
646 const float left_scale = left_upper - start;
647
648 const int right_lower = (int)floor(end);
649 const float right_scale = end - right_lower;
650
651 if (left_scale > 0)
652 {
653 expand_distribution[left_upper - 1] += left_scale * quantize_distribution[j];
654 }
655
656 if (right_scale > 0)
657 {
658 expand_distribution[right_lower] += right_scale * quantize_distribution[j];
659 }
660
661 for (int k = left_upper; k < right_lower; k++)
662 {
663 expand_distribution[k] += quantize_distribution[j];
664 }
665 }
666 {
667 const float start = threshold - num_per_bin;
668
669 const int left_upper = (int)ceil(start);
670 const float left_scale = left_upper - start;
671
672 if (left_scale > 0)
673 {
674 expand_distribution[left_upper - 1] += left_scale * quantize_distribution[target_bin - 1];
675 }
676
677 for (int k = left_upper; k < threshold; k++)
678 {
679 expand_distribution[k] += quantize_distribution[target_bin - 1];
680 }
681 }
682 }
683
684 // kl
685 const float kl_divergence = compute_kl_divergence(clip_distribution, expand_distribution);
686
687 // the best num of bin
688 if (kl_divergence < min_kl_divergence)
689 {
690 min_kl_divergence = kl_divergence;
691 target_threshold = threshold;
692 }
693 }
694
695 stat.threshold = (target_threshold + 0.5f) * stat.absmax / num_histogram_bins;
696 float scale = 127 / stat.threshold;
697
698 bottom_blob_scales[i].create(1);
699 bottom_blob_scales[i][0] = scale;
700 }
701
702 return 0;
703 }
704
compute_aciq_gaussian_clip(float absmax,int N,int num_bits=8)705 static float compute_aciq_gaussian_clip(float absmax, int N, int num_bits = 8)
706 {
707 const float alpha_gaussian[8] = {0, 1.71063519, 2.15159277, 2.55913646, 2.93620062, 3.28691474, 3.6151146, 3.92403714};
708
709 const double gaussian_const = (0.5 * 0.35) * (1 + sqrt(3.14159265358979323846 * log(4)));
710
711 double std = (absmax * 2 * gaussian_const) / sqrt(2 * log(N));
712
713 return (float)(alpha_gaussian[num_bits - 1] * std);
714 }
715
quantize_ACIQ()716 int QuantNet::quantize_ACIQ()
717 {
718 const int input_blob_count = (int)input_blobs.size();
719 const int conv_layer_count = (int)conv_layers.size();
720 const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
721 const int image_count = (int)listspaths[0].size();
722
723 std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
724 std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
725
726 // initialize conv weight scales
727 #pragma omp parallel for num_threads(quantize_num_threads)
728 for (int i = 0; i < conv_layer_count; i++)
729 {
730 const ncnn::Layer* layer = layers[conv_layers[i]];
731
732 if (layer->type == "Convolution")
733 {
734 const ncnn::Convolution* convolution = (const ncnn::Convolution*)layer;
735
736 const int num_output = convolution->num_output;
737 const int kernel_w = convolution->kernel_w;
738 const int kernel_h = convolution->kernel_h;
739 const int dilation_w = convolution->dilation_w;
740 const int dilation_h = convolution->dilation_h;
741 const int stride_w = convolution->stride_w;
742 const int stride_h = convolution->stride_h;
743
744 const int weight_data_size_output = convolution->weight_data_size / num_output;
745
746 // int8 winograd F43 needs weight data to use 6bit quantization
747 // TODO proper condition for winograd 3x3 int8
748 bool quant_6bit = false;
749 if (kernel_w == 3 && kernel_h == 3 && dilation_w == 1 && dilation_h == 1 && stride_w == 1 && stride_h == 1)
750 quant_6bit = true;
751
752 weight_scales[i].create(num_output);
753
754 for (int n = 0; n < num_output; n++)
755 {
756 const ncnn::Mat weight_data_n = convolution->weight_data.range(weight_data_size_output * n, weight_data_size_output);
757
758 float absmax = 0.f;
759 for (int k = 0; k < weight_data_size_output; k++)
760 {
761 absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
762 }
763
764 if (quant_6bit)
765 {
766 const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output, 6);
767 weight_scales[i][n] = 31 / threshold;
768 }
769 else
770 {
771 const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
772 weight_scales[i][n] = 127 / threshold;
773 }
774 }
775 }
776
777 if (layer->type == "ConvolutionDepthWise")
778 {
779 const ncnn::ConvolutionDepthWise* convolutiondepthwise = (const ncnn::ConvolutionDepthWise*)layer;
780
781 const int group = convolutiondepthwise->group;
782 const int weight_data_size_output = convolutiondepthwise->weight_data_size / group;
783
784 std::vector<float> scales;
785
786 weight_scales[i].create(group);
787
788 for (int n = 0; n < group; n++)
789 {
790 const ncnn::Mat weight_data_n = convolutiondepthwise->weight_data.range(weight_data_size_output * n, weight_data_size_output);
791
792 float absmax = 0.f;
793 for (int k = 0; k < weight_data_size_output; k++)
794 {
795 absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
796 }
797
798 const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
799 weight_scales[i][n] = 127 / threshold;
800 }
801 }
802
803 if (layer->type == "InnerProduct")
804 {
805 const ncnn::InnerProduct* innerproduct = (const ncnn::InnerProduct*)layer;
806
807 const int num_output = innerproduct->num_output;
808 const int weight_data_size_output = innerproduct->weight_data_size / num_output;
809
810 weight_scales[i].create(num_output);
811
812 for (int n = 0; n < num_output; n++)
813 {
814 const ncnn::Mat weight_data_n = innerproduct->weight_data.range(weight_data_size_output * n, weight_data_size_output);
815
816 float absmax = 0.f;
817 for (int k = 0; k < weight_data_size_output; k++)
818 {
819 absmax = std::max(absmax, (float)fabs(weight_data_n[k]));
820 }
821
822 const float threshold = compute_aciq_gaussian_clip(absmax, weight_data_size_output);
823 weight_scales[i][n] = 127 / threshold;
824 }
825 }
826 }
827
828 // count the absmax
829 #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
830 for (int i = 0; i < image_count; i++)
831 {
832 if (i % 100 == 0)
833 {
834 fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
835 }
836
837 ncnn::Extractor ex = create_extractor();
838
839 const int thread_num = ncnn::get_omp_thread_num();
840 ex.set_blob_allocator(&blob_allocators[thread_num]);
841 ex.set_workspace_allocator(&workspace_allocators[thread_num]);
842
843 for (int j = 0; j < input_blob_count; j++)
844 {
845 const int type_to_pixel = type_to_pixels[j];
846 const std::vector<float>& mean_vals = means[j];
847 const std::vector<float>& norm_vals = norms[j];
848
849 int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
850 if (type_to_pixel != pixel_convert_type)
851 {
852 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
853 }
854
855 ncnn::Mat in = read_and_resize_image(shapes[j], listspaths[j][i], pixel_convert_type);
856
857 in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
858
859 ex.input(input_blobs[j], in);
860 }
861
862 for (int j = 0; j < conv_bottom_blob_count; j++)
863 {
864 ncnn::Mat out;
865 ex.extract(conv_bottom_blobs[j], out);
866
867 // count absmax
868 {
869 float absmax = 0.f;
870
871 const int outc = out.c;
872 const int outsize = out.w * out.h;
873 for (int p = 0; p < outc; p++)
874 {
875 const float* ptr = out.channel(p);
876 for (int k = 0; k < outsize; k++)
877 {
878 absmax = std::max(absmax, (float)fabs(ptr[k]));
879 }
880 }
881
882 #pragma omp critical
883 {
884 QuantBlobStat& stat = quant_blob_stats[j];
885 stat.absmax = std::max(stat.absmax, absmax);
886 stat.total = outc * outsize;
887 }
888 }
889 }
890 }
891
892 // alpha gaussian
893 #pragma omp parallel for num_threads(quantize_num_threads)
894 for (int i = 0; i < conv_bottom_blob_count; i++)
895 {
896 QuantBlobStat& stat = quant_blob_stats[i];
897
898 stat.threshold = compute_aciq_gaussian_clip(stat.absmax, stat.total);
899 float scale = 127 / stat.threshold;
900
901 bottom_blob_scales[i].create(1);
902 bottom_blob_scales[i][0] = scale;
903 }
904
905 return 0;
906 }
907
cosine_similarity(const ncnn::Mat & a,const ncnn::Mat & b)908 static float cosine_similarity(const ncnn::Mat& a, const ncnn::Mat& b)
909 {
910 const int chanenls = a.c;
911 const int size = a.w * a.h;
912
913 float sa = 0;
914 float sb = 0;
915 float sum = 0;
916
917 for (int p = 0; p < chanenls; p++)
918 {
919 const float* pa = a.channel(p);
920 const float* pb = b.channel(p);
921
922 for (int i = 0; i < size; i++)
923 {
924 sa += pa[i] * pa[i];
925 sb += pb[i] * pb[i];
926 sum += pa[i] * pb[i];
927 }
928 }
929
930 float sim = (float)sum / sqrt(sa) / sqrt(sb);
931
932 return sim;
933 }
934
get_layer_param(const ncnn::Layer * layer,ncnn::ParamDict & pd)935 static int get_layer_param(const ncnn::Layer* layer, ncnn::ParamDict& pd)
936 {
937 if (layer->type == "Convolution")
938 {
939 ncnn::Convolution* convolution = (ncnn::Convolution*)layer;
940
941 pd.set(0, convolution->num_output);
942 pd.set(1, convolution->kernel_w);
943 pd.set(11, convolution->kernel_h);
944 pd.set(2, convolution->dilation_w);
945 pd.set(12, convolution->dilation_h);
946 pd.set(3, convolution->stride_w);
947 pd.set(13, convolution->stride_h);
948 pd.set(4, convolution->pad_left);
949 pd.set(15, convolution->pad_right);
950 pd.set(14, convolution->pad_top);
951 pd.set(16, convolution->pad_bottom);
952 pd.set(18, convolution->pad_value);
953 pd.set(5, convolution->bias_term);
954 pd.set(6, convolution->weight_data_size);
955 pd.set(8, convolution->int8_scale_term);
956 pd.set(9, convolution->activation_type);
957 pd.set(10, convolution->activation_params);
958 }
959 else if (layer->type == "ConvolutionDepthWise")
960 {
961 ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer;
962
963 pd.set(0, convolutiondepthwise->num_output);
964 pd.set(1, convolutiondepthwise->kernel_w);
965 pd.set(11, convolutiondepthwise->kernel_h);
966 pd.set(2, convolutiondepthwise->dilation_w);
967 pd.set(12, convolutiondepthwise->dilation_h);
968 pd.set(3, convolutiondepthwise->stride_w);
969 pd.set(13, convolutiondepthwise->stride_h);
970 pd.set(4, convolutiondepthwise->pad_left);
971 pd.set(15, convolutiondepthwise->pad_right);
972 pd.set(14, convolutiondepthwise->pad_top);
973 pd.set(16, convolutiondepthwise->pad_bottom);
974 pd.set(18, convolutiondepthwise->pad_value);
975 pd.set(5, convolutiondepthwise->bias_term);
976 pd.set(6, convolutiondepthwise->weight_data_size);
977 pd.set(7, convolutiondepthwise->group);
978 pd.set(8, convolutiondepthwise->int8_scale_term);
979 pd.set(9, convolutiondepthwise->activation_type);
980 pd.set(10, convolutiondepthwise->activation_params);
981 }
982 else if (layer->type == "InnerProduct")
983 {
984 ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer;
985
986 pd.set(0, innerproduct->num_output);
987 pd.set(1, innerproduct->bias_term);
988 pd.set(2, innerproduct->weight_data_size);
989 pd.set(8, innerproduct->int8_scale_term);
990 pd.set(9, innerproduct->activation_type);
991 pd.set(10, innerproduct->activation_params);
992 }
993 else
994 {
995 fprintf(stderr, "unexpected layer type %s in get_layer_param\n", layer->type.c_str());
996 return -1;
997 }
998
999 return 0;
1000 }
1001
get_layer_weights(const ncnn::Layer * layer,std::vector<ncnn::Mat> & weights)1002 static int get_layer_weights(const ncnn::Layer* layer, std::vector<ncnn::Mat>& weights)
1003 {
1004 if (layer->type == "Convolution")
1005 {
1006 ncnn::Convolution* convolution = (ncnn::Convolution*)layer;
1007 weights.push_back(convolution->weight_data);
1008 if (convolution->bias_term)
1009 weights.push_back(convolution->bias_data);
1010 }
1011 else if (layer->type == "ConvolutionDepthWise")
1012 {
1013 ncnn::ConvolutionDepthWise* convolutiondepthwise = (ncnn::ConvolutionDepthWise*)layer;
1014 weights.push_back(convolutiondepthwise->weight_data);
1015 if (convolutiondepthwise->bias_term)
1016 weights.push_back(convolutiondepthwise->bias_data);
1017 }
1018 else if (layer->type == "InnerProduct")
1019 {
1020 ncnn::InnerProduct* innerproduct = (ncnn::InnerProduct*)layer;
1021 weights.push_back(innerproduct->weight_data);
1022 if (innerproduct->bias_term)
1023 weights.push_back(innerproduct->bias_data);
1024 }
1025 else
1026 {
1027 fprintf(stderr, "unexpected layer type %s in get_layer_weights\n", layer->type.c_str());
1028 return -1;
1029 }
1030
1031 return 0;
1032 }
1033
quantize_EQ()1034 int QuantNet::quantize_EQ()
1035 {
1036 // find the initial scale via KL
1037 quantize_KL();
1038
1039 print_quant_info();
1040
1041 const int input_blob_count = (int)input_blobs.size();
1042 const int conv_layer_count = (int)conv_layers.size();
1043 const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
1044
1045 std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
1046 std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
1047
1048 // max 50 images for EQ
1049 const int image_count = std::min((int)listspaths[0].size(), 50);
1050
1051 const float scale_range_lower = 0.5f;
1052 const float scale_range_upper = 2.0f;
1053 const int search_steps = 100;
1054
1055 for (int i = 0; i < conv_layer_count; i++)
1056 {
1057 ncnn::Mat& weight_scale = weight_scales[i];
1058 ncnn::Mat& bottom_blob_scale = bottom_blob_scales[i];
1059
1060 const ncnn::Layer* layer = layers[conv_layers[i]];
1061
1062 // search weight scale
1063 for (int j = 0; j < weight_scale.w; j++)
1064 {
1065 const float scale = weight_scale[j];
1066 const float scale_lower = scale * scale_range_lower;
1067 const float scale_upper = scale * scale_range_upper;
1068 const float scale_step = (scale_upper - scale_lower) / search_steps;
1069
1070 std::vector<double> avgsims(search_steps, 0.0);
1071
1072 #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
1073 for (int ii = 0; ii < image_count; ii++)
1074 {
1075 if (ii % 100 == 0)
1076 {
1077 fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count);
1078 }
1079
1080 ncnn::Extractor ex = create_extractor();
1081
1082 const int thread_num = ncnn::get_omp_thread_num();
1083 ex.set_blob_allocator(&blob_allocators[thread_num]);
1084 ex.set_workspace_allocator(&workspace_allocators[thread_num]);
1085
1086 for (int jj = 0; jj < input_blob_count; jj++)
1087 {
1088 const int type_to_pixel = type_to_pixels[jj];
1089 const std::vector<float>& mean_vals = means[jj];
1090 const std::vector<float>& norm_vals = norms[jj];
1091
1092 int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
1093 if (type_to_pixel != pixel_convert_type)
1094 {
1095 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
1096 }
1097
1098 ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type);
1099
1100 in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
1101
1102 ex.input(input_blobs[jj], in);
1103 }
1104
1105 ncnn::Mat in;
1106 ex.extract(conv_bottom_blobs[i], in);
1107
1108 ncnn::Mat out;
1109 ex.extract(conv_top_blobs[i], out);
1110
1111 ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex);
1112
1113 ncnn::ParamDict pd;
1114 get_layer_param(layer, pd);
1115 pd.set(8, 1); //int8_scale_term
1116 layer_int8->load_param(pd);
1117
1118 std::vector<float> sims(search_steps);
1119 for (int k = 0; k < search_steps; k++)
1120 {
1121 ncnn::Mat new_weight_scale = weight_scale.clone();
1122 new_weight_scale[j] = scale_lower + k * scale_step;
1123
1124 std::vector<ncnn::Mat> weights;
1125 get_layer_weights(layer, weights);
1126 weights.push_back(new_weight_scale);
1127 weights.push_back(bottom_blob_scale);
1128 layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data()));
1129
1130 ncnn::Option opt_int8;
1131 opt_int8.use_packing_layout = false;
1132
1133 layer_int8->create_pipeline(opt_int8);
1134
1135 ncnn::Mat out_int8;
1136 layer_int8->forward(in, out_int8, opt_int8);
1137
1138 layer_int8->destroy_pipeline(opt_int8);
1139
1140 sims[k] = cosine_similarity(out, out_int8);
1141 }
1142
1143 delete layer_int8;
1144
1145 #pragma omp critical
1146 {
1147 for (int k = 0; k < search_steps; k++)
1148 {
1149 avgsims[k] += sims[k];
1150 }
1151 }
1152 }
1153
1154 double max_avgsim = 0.0;
1155 float new_scale = scale;
1156
1157 // find the scale with min cosine distance
1158 for (int k = 0; k < search_steps; k++)
1159 {
1160 if (max_avgsim < avgsims[k])
1161 {
1162 max_avgsim = avgsims[k];
1163 new_scale = scale_lower + k * scale_step;
1164 }
1165 }
1166
1167 fprintf(stderr, "%s w %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale);
1168 weight_scale[j] = new_scale;
1169 }
1170
1171 // search bottom blob scale
1172 for (int j = 0; j < bottom_blob_scale.w; j++)
1173 {
1174 const float scale = bottom_blob_scale[j];
1175 const float scale_lower = scale * scale_range_lower;
1176 const float scale_upper = scale * scale_range_upper;
1177 const float scale_step = (scale_upper - scale_lower) / search_steps;
1178
1179 std::vector<double> avgsims(search_steps, 0.0);
1180
1181 #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
1182 for (int ii = 0; ii < image_count; ii++)
1183 {
1184 if (ii % 100 == 0)
1185 {
1186 fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count);
1187 }
1188
1189 ncnn::Extractor ex = create_extractor();
1190
1191 const int thread_num = ncnn::get_omp_thread_num();
1192 ex.set_blob_allocator(&blob_allocators[thread_num]);
1193 ex.set_workspace_allocator(&workspace_allocators[thread_num]);
1194
1195 for (int jj = 0; jj < input_blob_count; jj++)
1196 {
1197 const int type_to_pixel = type_to_pixels[jj];
1198 const std::vector<float>& mean_vals = means[jj];
1199 const std::vector<float>& norm_vals = norms[jj];
1200
1201 int pixel_convert_type = ncnn::Mat::PIXEL_BGR;
1202 if (type_to_pixel != pixel_convert_type)
1203 {
1204 pixel_convert_type = pixel_convert_type | (type_to_pixel << ncnn::Mat::PIXEL_CONVERT_SHIFT);
1205 }
1206
1207 ncnn::Mat in = read_and_resize_image(shapes[jj], listspaths[jj][ii], pixel_convert_type);
1208
1209 in.substract_mean_normalize(mean_vals.data(), norm_vals.data());
1210
1211 ex.input(input_blobs[jj], in);
1212 }
1213
1214 ncnn::Mat in;
1215 ex.extract(conv_bottom_blobs[i], in);
1216
1217 ncnn::Mat out;
1218 ex.extract(conv_top_blobs[i], out);
1219
1220 ncnn::Layer* layer_int8 = ncnn::create_layer(layer->typeindex);
1221
1222 ncnn::ParamDict pd;
1223 get_layer_param(layer, pd);
1224 pd.set(8, 1); //int8_scale_term
1225 layer_int8->load_param(pd);
1226
1227 std::vector<float> sims(search_steps);
1228 for (int k = 0; k < search_steps; k++)
1229 {
1230 ncnn::Mat new_bottom_blob_scale = bottom_blob_scale.clone();
1231 new_bottom_blob_scale[j] = scale_lower + k * scale_step;
1232
1233 std::vector<ncnn::Mat> weights;
1234 get_layer_weights(layer, weights);
1235 weights.push_back(weight_scale);
1236 weights.push_back(new_bottom_blob_scale);
1237 layer_int8->load_model(ncnn::ModelBinFromMatArray(weights.data()));
1238
1239 ncnn::Option opt_int8;
1240 opt_int8.use_packing_layout = false;
1241
1242 layer_int8->create_pipeline(opt_int8);
1243
1244 ncnn::Mat out_int8;
1245 layer_int8->forward(in, out_int8, opt_int8);
1246
1247 layer_int8->destroy_pipeline(opt_int8);
1248
1249 sims[k] = cosine_similarity(out, out_int8);
1250 }
1251
1252 delete layer_int8;
1253
1254 #pragma omp critical
1255 {
1256 for (int k = 0; k < search_steps; k++)
1257 {
1258 avgsims[k] += sims[k];
1259 }
1260 }
1261 }
1262
1263 double max_avgsim = 0.0;
1264 float new_scale = scale;
1265
1266 // find the scale with min cosine distance
1267 for (int k = 0; k < search_steps; k++)
1268 {
1269 if (max_avgsim < avgsims[k])
1270 {
1271 max_avgsim = avgsims[k];
1272 new_scale = scale_lower + k * scale_step;
1273 }
1274 }
1275
1276 fprintf(stderr, "%s b %d = %f -> %f\n", layer->name.c_str(), j, scale, new_scale);
1277 bottom_blob_scale[j] = new_scale;
1278 }
1279
1280 // update quant info
1281 QuantBlobStat& stat = quant_blob_stats[i];
1282 stat.threshold = 127 / bottom_blob_scale[0];
1283 }
1284
1285 return 0;
1286 }
1287
parse_comma_path_list(char * s)1288 static std::vector<std::vector<std::string> > parse_comma_path_list(char* s)
1289 {
1290 std::vector<std::vector<std::string> > aps;
1291
1292 char* pch = strtok(s, ",");
1293 while (pch != NULL)
1294 {
1295 FILE* fp = fopen(pch, "rb");
1296 if (!fp)
1297 {
1298 fprintf(stderr, "fopen %s failed\n", pch);
1299 break;
1300 }
1301
1302 std::vector<std::string> paths;
1303
1304 // one filepath per line
1305 char line[1024];
1306 while (!feof(fp))
1307 {
1308 char* ss = fgets(line, 1024, fp);
1309 if (!ss)
1310 break;
1311
1312 char filepath[256];
1313 int nscan = sscanf(line, "%255s", filepath);
1314 if (nscan != 1)
1315 continue;
1316
1317 paths.push_back(std::string(filepath));
1318 }
1319
1320 fclose(fp);
1321
1322 aps.push_back(paths);
1323
1324 pch = strtok(NULL, ",");
1325 }
1326
1327 return aps;
1328 }
1329
vstr_to_float(const char vstr[20])1330 static float vstr_to_float(const char vstr[20])
1331 {
1332 double v = 0.0;
1333
1334 const char* p = vstr;
1335
1336 // sign
1337 bool sign = *p != '-';
1338 if (*p == '+' || *p == '-')
1339 {
1340 p++;
1341 }
1342
1343 // digits before decimal point or exponent
1344 uint64_t v1 = 0;
1345 while (isdigit(*p))
1346 {
1347 v1 = v1 * 10 + (*p - '0');
1348 p++;
1349 }
1350
1351 v = (double)v1;
1352
1353 // digits after decimal point
1354 if (*p == '.')
1355 {
1356 p++;
1357
1358 uint64_t pow10 = 1;
1359 uint64_t v2 = 0;
1360
1361 while (isdigit(*p))
1362 {
1363 v2 = v2 * 10 + (*p - '0');
1364 pow10 *= 10;
1365 p++;
1366 }
1367
1368 v += v2 / (double)pow10;
1369 }
1370
1371 // exponent
1372 if (*p == 'e' || *p == 'E')
1373 {
1374 p++;
1375
1376 // sign of exponent
1377 bool fact = *p != '-';
1378 if (*p == '+' || *p == '-')
1379 {
1380 p++;
1381 }
1382
1383 // digits of exponent
1384 uint64_t expon = 0;
1385 while (isdigit(*p))
1386 {
1387 expon = expon * 10 + (*p - '0');
1388 p++;
1389 }
1390
1391 double scale = 1.0;
1392 while (expon >= 8)
1393 {
1394 scale *= 1e8;
1395 expon -= 8;
1396 }
1397 while (expon > 0)
1398 {
1399 scale *= 10.0;
1400 expon -= 1;
1401 }
1402
1403 v = fact ? v * scale : v / scale;
1404 }
1405
1406 // fprintf(stderr, "v = %f\n", v);
1407 return sign ? (float)v : (float)-v;
1408 }
1409
parse_comma_float_array_list(char * s)1410 static std::vector<std::vector<float> > parse_comma_float_array_list(char* s)
1411 {
1412 std::vector<std::vector<float> > aaf;
1413
1414 char* pch = strtok(s, "[]");
1415 while (pch != NULL)
1416 {
1417 // parse a,b,c
1418 char vstr[20];
1419 int nconsumed = 0;
1420 int nscan = sscanf(pch, "%19[^,]%n", vstr, &nconsumed);
1421 if (nscan == 1)
1422 {
1423 // ok we get array
1424 pch += nconsumed;
1425
1426 std::vector<float> af;
1427 float v = vstr_to_float(vstr);
1428 af.push_back(v);
1429
1430 nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed);
1431 while (nscan == 1)
1432 {
1433 pch += nconsumed;
1434
1435 float v = vstr_to_float(vstr);
1436 af.push_back(v);
1437
1438 nscan = sscanf(pch, ",%19[^,]%n", vstr, &nconsumed);
1439 }
1440
1441 // array end
1442 aaf.push_back(af);
1443 }
1444
1445 pch = strtok(NULL, "[]");
1446 }
1447
1448 return aaf;
1449 }
1450
parse_comma_int_array_list(char * s)1451 static std::vector<std::vector<int> > parse_comma_int_array_list(char* s)
1452 {
1453 std::vector<std::vector<int> > aai;
1454
1455 char* pch = strtok(s, "[]");
1456 while (pch != NULL)
1457 {
1458 // parse a,b,c
1459 int v;
1460 int nconsumed = 0;
1461 int nscan = sscanf(pch, "%d%n", &v, &nconsumed);
1462 if (nscan == 1)
1463 {
1464 // ok we get array
1465 pch += nconsumed;
1466
1467 std::vector<int> ai;
1468 ai.push_back(v);
1469
1470 nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
1471 while (nscan == 1)
1472 {
1473 pch += nconsumed;
1474
1475 ai.push_back(v);
1476
1477 nscan = sscanf(pch, ",%d%n", &v, &nconsumed);
1478 }
1479
1480 // array end
1481 aai.push_back(ai);
1482 }
1483
1484 pch = strtok(NULL, "[]");
1485 }
1486
1487 return aai;
1488 }
1489
parse_comma_pixel_type_list(char * s)1490 static std::vector<int> parse_comma_pixel_type_list(char* s)
1491 {
1492 std::vector<int> aps;
1493
1494 char* pch = strtok(s, ",");
1495 while (pch != NULL)
1496 {
1497 // RAW/RGB/BGR/GRAY/RGBA/BGRA
1498 if (strcmp(pch, "RAW") == 0)
1499 aps.push_back(-233);
1500 if (strcmp(pch, "RGB") == 0)
1501 aps.push_back(ncnn::Mat::PIXEL_RGB);
1502 if (strcmp(pch, "BGR") == 0)
1503 aps.push_back(ncnn::Mat::PIXEL_BGR);
1504 if (strcmp(pch, "GRAY") == 0)
1505 aps.push_back(ncnn::Mat::PIXEL_GRAY);
1506 if (strcmp(pch, "RGBA") == 0)
1507 aps.push_back(ncnn::Mat::PIXEL_RGBA);
1508 if (strcmp(pch, "BGRA") == 0)
1509 aps.push_back(ncnn::Mat::PIXEL_BGRA);
1510
1511 pch = strtok(NULL, ",");
1512 }
1513
1514 return aps;
1515 }
1516
print_float_array_list(const std::vector<std::vector<float>> & list)1517 static void print_float_array_list(const std::vector<std::vector<float> >& list)
1518 {
1519 for (size_t i = 0; i < list.size(); i++)
1520 {
1521 const std::vector<float>& array = list[i];
1522 fprintf(stderr, "[");
1523 for (size_t j = 0; j < array.size(); j++)
1524 {
1525 fprintf(stderr, "%f", array[j]);
1526 if (j != array.size() - 1)
1527 fprintf(stderr, ",");
1528 }
1529 fprintf(stderr, "]");
1530 if (i != list.size() - 1)
1531 fprintf(stderr, ",");
1532 }
1533 }
1534
print_int_array_list(const std::vector<std::vector<int>> & list)1535 static void print_int_array_list(const std::vector<std::vector<int> >& list)
1536 {
1537 for (size_t i = 0; i < list.size(); i++)
1538 {
1539 const std::vector<int>& array = list[i];
1540 fprintf(stderr, "[");
1541 for (size_t j = 0; j < array.size(); j++)
1542 {
1543 fprintf(stderr, "%d", array[j]);
1544 if (j != array.size() - 1)
1545 fprintf(stderr, ",");
1546 }
1547 fprintf(stderr, "]");
1548 if (i != list.size() - 1)
1549 fprintf(stderr, ",");
1550 }
1551 }
1552
print_pixel_type_list(const std::vector<int> & list)1553 static void print_pixel_type_list(const std::vector<int>& list)
1554 {
1555 for (size_t i = 0; i < list.size(); i++)
1556 {
1557 const int type = list[i];
1558 if (type == -233)
1559 fprintf(stderr, "RAW");
1560 if (type == ncnn::Mat::PIXEL_RGB)
1561 fprintf(stderr, "RGB");
1562 if (type == ncnn::Mat::PIXEL_BGR)
1563 fprintf(stderr, "BGR");
1564 if (type == ncnn::Mat::PIXEL_GRAY)
1565 fprintf(stderr, "GRAY");
1566 if (type == ncnn::Mat::PIXEL_RGBA)
1567 fprintf(stderr, "RGBA");
1568 if (type == ncnn::Mat::PIXEL_BGRA)
1569 fprintf(stderr, "BGRA");
1570 if (i != list.size() - 1)
1571 fprintf(stderr, ",");
1572 }
1573 }
1574
show_usage()1575 static void show_usage()
1576 {
1577 fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
1578 fprintf(stderr, " mean=[104.0,117.0,123.0],...\n");
1579 fprintf(stderr, " norm=[1.0,1.0,1.0],...\n");
1580 fprintf(stderr, " shape=[224,224,3],...[w,h,c] or [w,h] **[0,0] will not resize\n");
1581 fprintf(stderr, " pixel=RAW/RGB/BGR/GRAY/RGBA/BGRA,...\n");
1582 fprintf(stderr, " thread=8\n");
1583 fprintf(stderr, " method=kl/aciq/eq\n");
1584 fprintf(stderr, "Sample usage: ncnn2table squeezenet.param squeezenet.bin imagelist.txt squeezenet.table mean=[104.0,117.0,123.0] norm=[1.0,1.0,1.0] shape=[227,227,3] pixel=BGR method=kl\n");
1585 }
1586
main(int argc,char ** argv)1587 int main(int argc, char** argv)
1588 {
1589 if (argc < 5)
1590 {
1591 show_usage();
1592 return -1;
1593 }
1594
1595 for (int i = 1; i < argc; i++)
1596 {
1597 if (argv[i][0] == '-')
1598 {
1599 show_usage();
1600 return -1;
1601 }
1602 }
1603
1604 const char* inparam = argv[1];
1605 const char* inbin = argv[2];
1606 char* lists = argv[3];
1607 const char* outtable = argv[4];
1608
1609 ncnn::Option opt;
1610 opt.num_threads = 1;
1611 opt.use_fp16_packed = false;
1612 opt.use_fp16_storage = false;
1613 opt.use_fp16_arithmetic = false;
1614
1615 QuantNet net;
1616 net.opt = opt;
1617 net.load_param(inparam);
1618 net.load_model(inbin);
1619
1620 net.init();
1621
1622 // load lists
1623 net.listspaths = parse_comma_path_list(lists);
1624
1625 std::string method = "kl";
1626
1627 for (int i = 5; i < argc; i++)
1628 {
1629 // key=value
1630 char* kv = argv[i];
1631
1632 char* eqs = strchr(kv, '=');
1633 if (eqs == NULL)
1634 {
1635 fprintf(stderr, "unrecognized arg %s\n", kv);
1636 continue;
1637 }
1638
1639 // split k v
1640 eqs[0] = '\0';
1641 const char* key = kv;
1642 char* value = eqs + 1;
1643
1644 // load mean norm shape
1645 if (memcmp(key, "mean", 4) == 0)
1646 net.means = parse_comma_float_array_list(value);
1647 if (memcmp(key, "norm", 4) == 0)
1648 net.norms = parse_comma_float_array_list(value);
1649 if (memcmp(key, "shape", 5) == 0)
1650 net.shapes = parse_comma_int_array_list(value);
1651 if (memcmp(key, "pixel", 5) == 0)
1652 net.type_to_pixels = parse_comma_pixel_type_list(value);
1653 if (memcmp(key, "thread", 6) == 0)
1654 net.quantize_num_threads = atoi(value);
1655 if (memcmp(key, "method", 6) == 0)
1656 method = std::string(value);
1657 }
1658
1659 // sanity check
1660 const size_t input_blob_count = net.input_blobs.size();
1661 if (net.listspaths.size() != input_blob_count)
1662 {
1663 fprintf(stderr, "expect %d lists, but got %d\n", (int)input_blob_count, (int)net.listspaths.size());
1664 return -1;
1665 }
1666 if (net.means.size() != input_blob_count)
1667 {
1668 fprintf(stderr, "expect %d means, but got %d\n", (int)input_blob_count, (int)net.means.size());
1669 return -1;
1670 }
1671 if (net.norms.size() != input_blob_count)
1672 {
1673 fprintf(stderr, "expect %d norms, but got %d\n", (int)input_blob_count, (int)net.norms.size());
1674 return -1;
1675 }
1676 if (net.shapes.size() != input_blob_count)
1677 {
1678 fprintf(stderr, "expect %d shapes, but got %d\n", (int)input_blob_count, (int)net.shapes.size());
1679 return -1;
1680 }
1681 if (net.type_to_pixels.size() != input_blob_count)
1682 {
1683 fprintf(stderr, "expect %d pixels, but got %d\n", (int)input_blob_count, (int)net.type_to_pixels.size());
1684 return -1;
1685 }
1686 if (net.quantize_num_threads < 0)
1687 {
1688 fprintf(stderr, "malformed thread %d\n", net.quantize_num_threads);
1689 return -1;
1690 }
1691
1692 // print quantnet config
1693 {
1694 fprintf(stderr, "mean = ");
1695 print_float_array_list(net.means);
1696 fprintf(stderr, "\n");
1697 fprintf(stderr, "norm = ");
1698 print_float_array_list(net.norms);
1699 fprintf(stderr, "\n");
1700 fprintf(stderr, "shape = ");
1701 print_int_array_list(net.shapes);
1702 fprintf(stderr, "\n");
1703 fprintf(stderr, "pixel = ");
1704 print_pixel_type_list(net.type_to_pixels);
1705 fprintf(stderr, "\n");
1706 fprintf(stderr, "thread = %d\n", net.quantize_num_threads);
1707 fprintf(stderr, "method = %s\n", method.c_str());
1708 fprintf(stderr, "---------------------------------------\n");
1709 }
1710
1711 if (method == "kl")
1712 {
1713 net.quantize_KL();
1714 }
1715 else if (method == "aciq")
1716 {
1717 net.quantize_ACIQ();
1718 }
1719 else if (method == "eq")
1720 {
1721 net.quantize_EQ();
1722 }
1723 else
1724 {
1725 fprintf(stderr, "not implemented yet !\n");
1726 fprintf(stderr, "unknown method %s, expect kl / aciq / eq\n", method.c_str());
1727 return -1;
1728 }
1729
1730 net.print_quant_info();
1731
1732 net.save_table(outtable);
1733
1734 return 0;
1735 }
1736