1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14
15 #ifdef _MSC_VER
16 #define _CRT_SECURE_NO_DEPRECATE
17 #endif
18
19 #include "caffe.pb.h"
20
21 #include <algorithm>
22 #include <fstream>
23 #include <google/protobuf/io/coded_stream.h>
24 #include <google/protobuf/io/zero_copy_stream_impl.h>
25 #include <google/protobuf/message.h>
26 #include <google/protobuf/text_format.h>
27 #include <limits.h>
28 #include <limits>
29 #include <map>
30 #include <math.h>
31 #include <set>
32 #include <stdio.h>
33
alignSize(size_t sz,int n)34 static inline size_t alignSize(size_t sz, int n)
35 {
36 return (sz + n - 1) & -n;
37 }
38
39 // convert float to half precision floating point
float2half(float value)40 static unsigned short float2half(float value)
41 {
42 // 1 : 8 : 23
43 union
44 {
45 unsigned int u;
46 float f;
47 } tmp;
48
49 tmp.f = value;
50
51 // 1 : 8 : 23
52 unsigned short sign = (tmp.u & 0x80000000) >> 31;
53 unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
54 unsigned int significand = tmp.u & 0x7FFFFF;
55
56 // fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
57
58 // 1 : 5 : 10
59 unsigned short fp16;
60 if (exponent == 0)
61 {
62 // zero or denormal, always underflow
63 fp16 = (sign << 15) | (0x00 << 10) | 0x00;
64 }
65 else if (exponent == 0xFF)
66 {
67 // infinity or NaN
68 fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
69 }
70 else
71 {
72 // normalized
73 short newexp = exponent + (-127 + 15);
74 if (newexp >= 31)
75 {
76 // overflow, return infinity
77 fp16 = (sign << 15) | (0x1F << 10) | 0x00;
78 }
79 else if (newexp <= 0)
80 {
81 // underflow
82 if (newexp >= -10)
83 {
84 // denormal half-precision
85 unsigned short sig = (significand | 0x800000) >> (14 - newexp);
86 fp16 = (sign << 15) | (0x00 << 10) | sig;
87 }
88 else
89 {
90 // underflow
91 fp16 = (sign << 15) | (0x00 << 10) | 0x00;
92 }
93 }
94 else
95 {
96 fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
97 }
98 }
99
100 return fp16;
101 }
102
103 // round to nearest
float2int8(float value)104 static signed char float2int8(float value)
105 {
106 float tmp;
107 if (value >= 0.f)
108 tmp = value + 0.5f;
109 else
110 tmp = value - 0.5f;
111
112 if (tmp > 127)
113 return 127;
114 if (tmp < -127)
115 return -127;
116
117 return static_cast<signed char>(tmp);
118 }
119
read_int8scale_table(const char * filepath,std::map<std::string,std::vector<float>> & blob_int8scale_table,std::map<std::string,std::vector<float>> & weight_int8scale_table)120 static bool read_int8scale_table(const char* filepath, std::map<std::string, std::vector<float> >& blob_int8scale_table, std::map<std::string, std::vector<float> >& weight_int8scale_table)
121 {
122 blob_int8scale_table.clear();
123 weight_int8scale_table.clear();
124
125 FILE* fp = fopen(filepath, "rb");
126 if (!fp)
127 {
128 fprintf(stderr, "fopen %s failed\n", filepath);
129 return false;
130 }
131
132 bool in_scale_vector = false;
133
134 std::string keystr;
135 std::vector<float> scales;
136
137 while (!feof(fp))
138 {
139 char key[256];
140 int nscan = fscanf(fp, "%255s", key);
141 if (nscan != 1)
142 {
143 break;
144 }
145
146 if (in_scale_vector)
147 {
148 float scale = 1.f;
149 int nscan = sscanf(key, "%f", &scale);
150 if (nscan == 1)
151 {
152 scales.push_back(scale);
153 continue;
154 }
155 else
156 {
157 // XYZ_param_N pattern
158 if (strstr(keystr.c_str(), "_param_"))
159 {
160 weight_int8scale_table[keystr] = scales;
161 }
162 else
163 {
164 blob_int8scale_table[keystr] = scales;
165 }
166
167 keystr.clear();
168 scales.clear();
169
170 in_scale_vector = false;
171 }
172 }
173
174 if (!in_scale_vector)
175 {
176 keystr = key;
177
178 in_scale_vector = true;
179 }
180 }
181
182 if (in_scale_vector)
183 {
184 // XYZ_param_N pattern
185 if (strstr(keystr.c_str(), "_param_"))
186 {
187 weight_int8scale_table[keystr] = scales;
188 }
189 else
190 {
191 blob_int8scale_table[keystr] = scales;
192 }
193 }
194
195 fclose(fp);
196
197 return true;
198 }
199
quantize_weight(float * data,size_t data_length,std::vector<unsigned short> & float16_weights)200 static int quantize_weight(float* data, size_t data_length, std::vector<unsigned short>& float16_weights)
201 {
202 float16_weights.resize(data_length);
203
204 for (size_t i = 0; i < data_length; i++)
205 {
206 float f = data[i];
207
208 unsigned short fp16 = float2half(f);
209
210 float16_weights[i] = fp16;
211 }
212
213 // magic tag for half-precision floating point
214 return 0x01306B47;
215 }
216
quantize_weight(float * data,size_t data_length,std::vector<float> scales,std::vector<signed char> & int8_weights)217 static int quantize_weight(float* data, size_t data_length, std::vector<float> scales, std::vector<signed char>& int8_weights)
218 {
219 int8_weights.resize(data_length);
220
221 const int length_per_group = static_cast<int>(data_length / scales.size());
222
223 for (size_t i = 0; i < data_length; i++)
224 {
225 float f = data[i];
226
227 signed char int8 = float2int8(f * scales[i / length_per_group]);
228
229 int8_weights[i] = int8;
230 }
231
232 // magic tag for int8
233 return 0x000D4B38;
234 }
235
quantize_weight(float * data,size_t data_length,int quantize_level,std::vector<float> & quantize_table,std::vector<unsigned char> & quantize_index)236 static bool quantize_weight(float* data, size_t data_length, int quantize_level, std::vector<float>& quantize_table, std::vector<unsigned char>& quantize_index)
237 {
238 assert(quantize_level != 0);
239 assert(data != NULL);
240 assert(data_length > 0);
241
242 if (data_length < static_cast<size_t>(quantize_level))
243 {
244 fprintf(stderr, "No need quantize,because: data_length < quantize_level");
245 return false;
246 }
247
248 quantize_table.reserve(quantize_level);
249 quantize_index.reserve(data_length);
250
251 // 1. Find min and max value
252 float max_value = std::numeric_limits<float>::min();
253 float min_value = std::numeric_limits<float>::max();
254
255 for (size_t i = 0; i < data_length; ++i)
256 {
257 if (max_value < data[i]) max_value = data[i];
258 if (min_value > data[i]) min_value = data[i];
259 }
260 float strides = (max_value - min_value) / quantize_level;
261
262 // 2. Generate quantize table
263 for (int i = 0; i < quantize_level; ++i)
264 {
265 quantize_table.push_back(min_value + i * strides);
266 }
267
268 // 3. Align data to the quantized value
269 for (size_t i = 0; i < data_length; ++i)
270 {
271 int table_index = int((data[i] - min_value) / strides);
272 table_index = std::min(table_index, quantize_level - 1);
273
274 float low_value = quantize_table[table_index];
275 float high_value = low_value + strides;
276
277 // find a nearest value between low and high value.
278 const float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
279
280 table_index = int((targetValue - min_value) / strides);
281 table_index = std::min(table_index, quantize_level - 1);
282 quantize_index.push_back(table_index);
283 }
284
285 return true;
286 }
287
read_proto_from_text(const char * filepath,google::protobuf::Message * message)288 static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
289 {
290 std::ifstream fs(filepath, std::ifstream::in);
291 if (!fs.is_open())
292 {
293 fprintf(stderr, "open failed %s\n", filepath);
294 return false;
295 }
296
297 google::protobuf::io::IstreamInputStream input(&fs);
298 bool success = google::protobuf::TextFormat::Parse(&input, message);
299
300 fs.close();
301
302 return success;
303 }
304
read_proto_from_binary(const char * filepath,google::protobuf::Message * message)305 static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
306 {
307 std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
308 if (!fs.is_open())
309 {
310 fprintf(stderr, "open failed %s\n", filepath);
311 return false;
312 }
313
314 google::protobuf::io::IstreamInputStream input(&fs);
315 google::protobuf::io::CodedInputStream codedstr(&input);
316
317 #if GOOGLE_PROTOBUF_VERSION >= 3011000
318 codedstr.SetTotalBytesLimit(INT_MAX);
319 #else
320 codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
321 #endif
322
323 bool success = message->ParseFromCodedStream(&codedstr);
324
325 fs.close();
326
327 return success;
328 }
329
main(int argc,char ** argv)330 int main(int argc, char** argv)
331 {
332 if (!(argc == 3 || argc == 5 || argc == 6 || argc == 7))
333 {
334 fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel] [int8scaletable]\n", argv[0]);
335 return -1;
336 }
337
338 const char* caffeproto = argv[1];
339 const char* caffemodel = argv[2];
340 const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
341 const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
342 const char* quantize_param = argc >= 6 ? argv[5] : "0";
343 const char* int8scale_table_path = argc == 7 ? argv[6] : NULL;
344 int quantize_level = atoi(quantize_param);
345
346 if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536)
347 {
348 fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
349 return -1;
350 }
351
352 caffe::NetParameter proto;
353 caffe::NetParameter net;
354
355 // load
356 bool s0 = read_proto_from_text(caffeproto, &proto);
357 if (!s0)
358 {
359 fprintf(stderr, "read_proto_from_text failed\n");
360 return -1;
361 }
362
363 bool s1 = read_proto_from_binary(caffemodel, &net);
364 if (!s1)
365 {
366 fprintf(stderr, "read_proto_from_binary failed\n");
367 return -1;
368 }
369
370 std::map<std::string, std::vector<float> > blob_int8scale_table;
371 std::map<std::string, std::vector<float> > weight_int8scale_table;
372 if (int8scale_table_path)
373 {
374 bool s2 = read_int8scale_table(int8scale_table_path, blob_int8scale_table, weight_int8scale_table);
375 if (!s2)
376 {
377 fprintf(stderr, "read_int8scale_table failed\n");
378 return -1;
379 }
380 }
381
382 FILE* pp = fopen(ncnn_prototxt, "wb");
383 FILE* bp = fopen(ncnn_modelbin, "wb");
384
385 // magic
386 fprintf(pp, "7767517\n");
387
388 // rename mapping for identical bottom top style
389 std::map<std::string, std::string> blob_name_decorated;
390
391 // bottom blob reference
392 std::map<std::string, int> bottom_reference;
393
394 // global definition line
395 // [layer count] [blob count]
396 int layer_count = proto.layer_size();
397 std::set<std::string> blob_names;
398 for (int i = 0; i < layer_count; i++)
399 {
400 const caffe::LayerParameter& layer = proto.layer(i);
401
402 for (int j = 0; j < layer.bottom_size(); j++)
403 {
404 std::string blob_name = layer.bottom(j);
405 if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
406 {
407 blob_name = blob_name_decorated[blob_name];
408 }
409
410 blob_names.insert(blob_name);
411
412 if (bottom_reference.find(blob_name) == bottom_reference.end())
413 {
414 bottom_reference[blob_name] = 1;
415 }
416 else
417 {
418 bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
419 }
420 }
421
422 if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
423 {
424 std::string blob_name = layer.top(0) + "_" + layer.name();
425 blob_name_decorated[layer.top(0)] = blob_name;
426 blob_names.insert(blob_name);
427 }
428 else
429 {
430 for (int j = 0; j < layer.top_size(); j++)
431 {
432 std::string blob_name = layer.top(j);
433 blob_names.insert(blob_name);
434 }
435 }
436 }
437 // remove bottom_reference entry with reference equals to one
438 int splitncnn_blob_count = 0;
439 std::map<std::string, int>::iterator it = bottom_reference.begin();
440 while (it != bottom_reference.end())
441 {
442 if (it->second == 1)
443 {
444 bottom_reference.erase(it++);
445 }
446 else
447 {
448 splitncnn_blob_count += it->second;
449 // fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
450 ++it;
451 }
452 }
453 fprintf(pp, "%d %d\n", int(layer_count + bottom_reference.size()), int(blob_names.size() + splitncnn_blob_count));
454
455 // populate
456 blob_name_decorated.clear();
457 int internal_split = 0;
458 for (int i = 0; i < layer_count; i++)
459 {
460 const caffe::LayerParameter& layer = proto.layer(i);
461
462 // layer definition line, repeated
463 // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
464 if (layer.type() == "BN")
465 {
466 fprintf(pp, "%-16s", "Scale");
467 }
468 else if (layer.type() == "Convolution")
469 {
470 const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
471 if (convolution_param.group() != 1)
472 fprintf(pp, "%-16s", "ConvolutionDepthWise");
473 else
474 fprintf(pp, "%-16s", "Convolution");
475 }
476 else if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
477 {
478 fprintf(pp, "%-16s", "ConvolutionDepthWise");
479 }
480 else if (layer.type() == "Deconvolution")
481 {
482 const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
483 if (convolution_param.group() != 1)
484 fprintf(pp, "%-16s", "DeconvolutionDepthWise");
485 else
486 fprintf(pp, "%-16s", "Deconvolution");
487 }
488 else if (layer.type() == "MemoryData")
489 {
490 fprintf(pp, "%-16s", "Input");
491 }
492 else if (layer.type() == "Python")
493 {
494 const caffe::PythonParameter& python_param = layer.python_param();
495 std::string python_layer_name = python_param.layer();
496 if (python_layer_name == "ProposalLayer")
497 fprintf(pp, "%-16s", "Proposal");
498 else
499 fprintf(pp, "%-16s", python_layer_name.c_str());
500 }
501 else if (layer.type() == "ReLU6")
502 {
503 fprintf(pp, "%-16s", "Clip");
504 }
505 else if (layer.type() == "Silence")
506 {
507 fprintf(pp, "%-16s", "Noop");
508 }
509 else
510 {
511 fprintf(pp, "%-16s", layer.type().c_str());
512 }
513 fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
514
515 for (int j = 0; j < layer.bottom_size(); j++)
516 {
517 std::string blob_name = layer.bottom(j);
518 if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
519 {
520 blob_name = blob_name_decorated[layer.bottom(j)];
521 }
522
523 if (bottom_reference.find(blob_name) != bottom_reference.end())
524 {
525 int refidx = bottom_reference[blob_name] - 1;
526 bottom_reference[blob_name] = refidx;
527
528 char splitsuffix[256];
529 sprintf(splitsuffix, "_splitncnn_%d", refidx);
530 blob_name = blob_name + splitsuffix;
531 }
532
533 fprintf(pp, " %s", blob_name.c_str());
534 }
535
536 // decorated
537 if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
538 {
539 std::string blob_name = layer.top(0) + "_" + layer.name();
540 blob_name_decorated[layer.top(0)] = blob_name;
541
542 fprintf(pp, " %s", blob_name.c_str());
543 }
544 else
545 {
546 for (int j = 0; j < layer.top_size(); j++)
547 {
548 std::string blob_name = layer.top(j);
549 fprintf(pp, " %s", blob_name.c_str());
550 }
551 }
552
553 // find blob binary by layer name
554 int netidx;
555 for (netidx = 0; netidx < net.layer_size(); netidx++)
556 {
557 if (net.layer(netidx).name() == layer.name())
558 {
559 break;
560 }
561 }
562
563 // layer specific params
564 if (layer.type() == "BatchNorm")
565 {
566 const caffe::LayerParameter& binlayer = net.layer(netidx);
567
568 const caffe::BlobProto& mean_blob = binlayer.blobs(0);
569 const caffe::BlobProto& var_blob = binlayer.blobs(1);
570 fprintf(pp, " 0=%d", (int)mean_blob.data_size());
571
572 const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
573 float eps = batch_norm_param.eps();
574
575 std::vector<float> ones(mean_blob.data_size(), 1.f);
576 fwrite(ones.data(), sizeof(float), ones.size(), bp); // slope
577
578 if (binlayer.blobs_size() < 3)
579 {
580 fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
581 float tmp;
582 for (int j = 0; j < var_blob.data_size(); j++)
583 {
584 tmp = var_blob.data().data()[j] + eps;
585 fwrite(&tmp, sizeof(float), 1, bp);
586 }
587 }
588 else
589 {
590 float scale_factor = binlayer.blobs(2).data().data()[0] == 0 ? 0 : 1 / binlayer.blobs(2).data().data()[0];
591 // premultiply scale_factor to mean and variance
592 float tmp;
593 for (int j = 0; j < mean_blob.data_size(); j++)
594 {
595 tmp = mean_blob.data().data()[j] * scale_factor;
596 fwrite(&tmp, sizeof(float), 1, bp);
597 }
598 for (int j = 0; j < var_blob.data_size(); j++)
599 {
600 tmp = var_blob.data().data()[j] * scale_factor + eps;
601 fwrite(&tmp, sizeof(float), 1, bp);
602 }
603 }
604
605 std::vector<float> zeros(mean_blob.data_size(), 0.f);
606 fwrite(zeros.data(), sizeof(float), zeros.size(), bp); // bias
607 }
608 else if (layer.type() == "BN")
609 {
610 const caffe::LayerParameter& binlayer = net.layer(netidx);
611
612 const caffe::BlobProto& scale_blob = binlayer.blobs(0);
613 const caffe::BlobProto& shift_blob = binlayer.blobs(1);
614 fprintf(pp, " 0=%d", (int)scale_blob.data_size());
615 fprintf(pp, " 1=1");
616
617 fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
618 fwrite(shift_blob.data().data(), sizeof(float), shift_blob.data_size(), bp);
619 }
620 else if (layer.type() == "Concat")
621 {
622 const caffe::ConcatParameter& concat_param = layer.concat_param();
623 int axis = concat_param.axis() - 1;
624 fprintf(pp, " 0=%d", axis);
625 }
626 else if (layer.type() == "Convolution" || layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
627 {
628 const caffe::LayerParameter& binlayer = net.layer(netidx);
629
630 const caffe::BlobProto& weight_blob = binlayer.blobs(0);
631 const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
632 fprintf(pp, " 0=%d", convolution_param.num_output());
633 if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
634 {
635 fprintf(pp, " 1=%d", convolution_param.kernel_w());
636 fprintf(pp, " 11=%d", convolution_param.kernel_h());
637 }
638 else
639 {
640 fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
641 }
642 fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
643 if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
644 {
645 fprintf(pp, " 3=%d", convolution_param.stride_w());
646 fprintf(pp, " 13=%d", convolution_param.stride_h());
647 }
648 else
649 {
650 fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
651 }
652 if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
653 {
654 fprintf(pp, " 4=%d", convolution_param.pad_w());
655 fprintf(pp, " 14=%d", convolution_param.pad_h());
656 }
657 else
658 {
659 fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
660 }
661 fprintf(pp, " 5=%d", convolution_param.bias_term());
662 fprintf(pp, " 6=%d", weight_blob.data_size());
663
664 int num_group = 1;
665 if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
666 {
667 num_group = convolution_param.num_output();
668 }
669 else
670 {
671 num_group = convolution_param.group();
672 }
673
674 if (num_group != 1)
675 {
676 fprintf(pp, " 7=%d", num_group);
677 }
678
679 bool int8_scale_term = false;
680 std::vector<float> weight_int8scale;
681 std::vector<float> blob_int8scale;
682
683 if (int8scale_table_path)
684 {
685 char key[256];
686 sprintf(key, "%s_param_0", layer.name().c_str());
687 if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
688 {
689 weight_int8scale = weight_int8scale_table[std::string(key)];
690 }
691
692 if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
693 {
694 blob_int8scale = blob_int8scale_table[layer.name()];
695 }
696
697 int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
698
699 if (int8_scale_term)
700 {
701 if ((int)weight_int8scale.size() == num_group)
702 {
703 fprintf(pp, " 8=1");
704 }
705 else
706 {
707 fprintf(pp, " 8=2");
708 }
709 }
710 }
711
712 for (int j = 0; j < binlayer.blobs_size(); j++)
713 {
714 int quantize_tag = 0;
715 const caffe::BlobProto& blob = binlayer.blobs(j);
716
717 std::vector<float> quantize_table;
718 std::vector<unsigned char> quantize_index;
719
720 std::vector<unsigned short> float16_weights;
721 std::vector<signed char> int8_weights;
722
723 // we will not quantize the bias values
724 if (j == 0)
725 {
726 if (int8_scale_term)
727 {
728 if (quantize_level == 0)
729 {
730 quantize_tag = 0x0002C056;
731 }
732 else if (quantize_level == 256)
733 {
734 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
735 }
736 }
737 else if (quantize_level == 256)
738 {
739 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
740 }
741 else if (quantize_level == 65536)
742 {
743 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
744 }
745
746 // write quantize tag first
747 fwrite(&quantize_tag, sizeof(int), 1, bp);
748
749 if (quantize_tag)
750 {
751 int p0 = ftell(bp);
752 if (int8_scale_term)
753 {
754 if (quantize_level == 0)
755 {
756 // write original data and int8scale
757 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
758 }
759 else if (quantize_level == 256)
760 {
761 fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
762 }
763 }
764 else if (quantize_level == 256)
765 {
766 // write quantize table and index
767 fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
768 fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
769 }
770 else if (quantize_level == 65536)
771 {
772 fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
773 }
774
775 // padding to 32bit align
776 int nwrite = ftell(bp) - p0;
777 int nalign = int(alignSize(nwrite, 4));
778 unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
779 fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
780 }
781 else
782 {
783 // write original data
784 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
785 }
786 }
787 else
788 {
789 // write original data
790 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
791 }
792 }
793
794 if (int8_scale_term)
795 {
796 // write int8_scale data
797 fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
798 fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
799 }
800 }
801 else if (layer.type() == "Crop")
802 {
803 const caffe::CropParameter& crop_param = layer.crop_param();
804 int num_offset = crop_param.offset_size();
805 if (num_offset == 1)
806 {
807 int offset = crop_param.offset(0);
808 int axis = crop_param.axis() - 1;
809 if (axis == 0)
810 {
811 fprintf(pp, " 0=%d", offset);
812 fprintf(pp, " 1=%d", offset);
813 fprintf(pp, " 2=%d", offset);
814 }
815 else if (axis == 1)
816 {
817 fprintf(pp, " 0=%d", offset);
818 fprintf(pp, " 1=%d", offset);
819 }
820 else if (axis == 2)
821 {
822 fprintf(pp, " 0=%d", offset);
823 }
824 }
825 else if (num_offset == 2)
826 {
827 int woffset = crop_param.offset(1);
828 int hoffset = crop_param.offset(0);
829 fprintf(pp, " 0=%d", woffset);
830 fprintf(pp, " 1=%d", hoffset);
831 }
832 else if (num_offset == 3)
833 {
834 int woffset = crop_param.offset(2);
835 int hoffset = crop_param.offset(1);
836 int coffset = crop_param.offset(0);
837 fprintf(pp, " 0=%d", woffset);
838 fprintf(pp, " 1=%d", hoffset);
839 fprintf(pp, " 2=%d", coffset);
840 }
841 }
842 else if (layer.type() == "Deconvolution")
843 {
844 const caffe::LayerParameter& binlayer = net.layer(netidx);
845
846 const caffe::BlobProto& weight_blob = binlayer.blobs(0);
847 const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
848 fprintf(pp, " 0=%d", convolution_param.num_output());
849 if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
850 {
851 fprintf(pp, " 1=%d", convolution_param.kernel_w());
852 fprintf(pp, " 11=%d", convolution_param.kernel_h());
853 }
854 else
855 {
856 fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
857 }
858 fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
859 if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
860 {
861 fprintf(pp, " 3=%d", convolution_param.stride_w());
862 fprintf(pp, " 13=%d", convolution_param.stride_h());
863 }
864 else
865 {
866 fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
867 }
868 if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
869 {
870 fprintf(pp, " 4=%d", convolution_param.pad_w());
871 fprintf(pp, " 14=%d", convolution_param.pad_h());
872 }
873 else
874 {
875 fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
876 }
877 fprintf(pp, " 5=%d", convolution_param.bias_term());
878 fprintf(pp, " 6=%d", weight_blob.data_size());
879
880 int group = convolution_param.group();
881 if (group != 1)
882 {
883 fprintf(pp, " 7=%d", group);
884 }
885
886 int quantized_weight = 0;
887 fwrite(&quantized_weight, sizeof(int), 1, bp);
888
889 int maxk = 0;
890 if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
891 {
892 maxk = convolution_param.kernel_w() * convolution_param.kernel_h();
893 }
894 else
895 {
896 maxk = convolution_param.kernel_size(0) * convolution_param.kernel_size(0);
897 }
898 for (int g = 0; g < group; g++)
899 {
900 // reorder weight from inch-outch to outch-inch
901 int num_output = convolution_param.num_output() / group;
902 int num_input = weight_blob.data_size() / maxk / num_output / group;
903 const float* weight_data_ptr = weight_blob.data().data() + g * maxk * num_output * num_input;
904 for (int k = 0; k < num_output; k++)
905 {
906 for (int j = 0; j < num_input; j++)
907 {
908 fwrite(weight_data_ptr + (j * num_output + k) * maxk, sizeof(float), maxk, bp);
909 }
910 }
911 }
912
913 for (int j = 1; j < binlayer.blobs_size(); j++)
914 {
915 const caffe::BlobProto& blob = binlayer.blobs(j);
916 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
917 }
918 }
919 else if (layer.type() == "DetectionOutput")
920 {
921 const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
922 const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
923 fprintf(pp, " 0=%d", detection_output_param.num_classes());
924 fprintf(pp, " 1=%e", nms_param.nms_threshold());
925 fprintf(pp, " 2=%d", nms_param.top_k());
926 fprintf(pp, " 3=%d", detection_output_param.keep_top_k());
927 fprintf(pp, " 4=%e", detection_output_param.confidence_threshold());
928 }
929 else if (layer.type() == "Dropout")
930 {
931 const caffe::DropoutParameter& dropout_param = layer.dropout_param();
932 if (dropout_param.has_scale_train() && !dropout_param.scale_train())
933 {
934 float scale = 1.f - dropout_param.dropout_ratio();
935 fprintf(pp, " 0=%e", scale);
936 }
937 }
938 else if (layer.type() == "Eltwise")
939 {
940 const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
941 int coeff_size = eltwise_param.coeff_size();
942 fprintf(pp, " 0=%d", (int)eltwise_param.operation());
943 fprintf(pp, " -23301=%d", coeff_size);
944 for (int j = 0; j < coeff_size; j++)
945 {
946 fprintf(pp, ",%e", eltwise_param.coeff(j));
947 }
948 }
949 else if (layer.type() == "ELU")
950 {
951 const caffe::ELUParameter& elu_param = layer.elu_param();
952 fprintf(pp, " 0=%e", elu_param.alpha());
953 }
954 else if (layer.type() == "Embed")
955 {
956 const caffe::LayerParameter& binlayer = net.layer(netidx);
957
958 const caffe::BlobProto& weight_blob = binlayer.blobs(0);
959 const caffe::EmbedParameter& embed_param = layer.embed_param();
960 fprintf(pp, " 0=%d", embed_param.num_output());
961 fprintf(pp, " 1=%d", embed_param.input_dim());
962 fprintf(pp, " 2=%d", embed_param.bias_term());
963 fprintf(pp, " 3=%d", weight_blob.data_size());
964
965 for (int j = 0; j < binlayer.blobs_size(); j++)
966 {
967 int quantize_tag = 0;
968 const caffe::BlobProto& blob = binlayer.blobs(j);
969
970 std::vector<float> quantize_table;
971 std::vector<unsigned char> quantize_index;
972
973 std::vector<unsigned short> float16_weights;
974
975 // we will not quantize the bias values
976 if (j == 0 && quantize_level != 0)
977 {
978 if (quantize_level == 256)
979 {
980 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
981 }
982 else if (quantize_level == 65536)
983 {
984 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
985 }
986 }
987
988 // write quantize tag first
989 if (j == 0)
990 fwrite(&quantize_tag, sizeof(int), 1, bp);
991
992 if (quantize_tag)
993 {
994 int p0 = ftell(bp);
995 if (quantize_level == 256)
996 {
997 // write quantize table and index
998 fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
999 fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
1000 }
1001 else if (quantize_level == 65536)
1002 {
1003 fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
1004 }
1005 // padding to 32bit align
1006 int nwrite = ftell(bp) - p0;
1007 int nalign = int(alignSize(nwrite, 4));
1008 unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
1009 fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
1010 }
1011 else
1012 {
1013 // write original data
1014 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1015 }
1016 }
1017 }
1018 else if (layer.type() == "InnerProduct")
1019 {
1020 const caffe::LayerParameter& binlayer = net.layer(netidx);
1021
1022 const caffe::BlobProto& weight_blob = binlayer.blobs(0);
1023 const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
1024 fprintf(pp, " 0=%d", inner_product_param.num_output());
1025 fprintf(pp, " 1=%d", inner_product_param.bias_term());
1026 fprintf(pp, " 2=%d", weight_blob.data_size());
1027
1028 bool int8_scale_term = false;
1029 std::vector<float> weight_int8scale;
1030 std::vector<float> blob_int8scale;
1031
1032 if (int8scale_table_path)
1033 {
1034 char key[256];
1035 sprintf(key, "%s_param_0", layer.name().c_str());
1036 if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
1037 {
1038 weight_int8scale = weight_int8scale_table[std::string(key)];
1039 }
1040
1041 if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
1042 {
1043 blob_int8scale = blob_int8scale_table[layer.name()];
1044 }
1045
1046 int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
1047
1048 if (int8_scale_term)
1049 {
1050 fprintf(pp, " 8=1");
1051 }
1052 }
1053
1054 for (int j = 0; j < binlayer.blobs_size(); j++)
1055 {
1056 int quantize_tag = 0;
1057 const caffe::BlobProto& blob = binlayer.blobs(j);
1058
1059 std::vector<float> quantize_table;
1060 std::vector<unsigned char> quantize_index;
1061
1062 std::vector<unsigned short> float16_weights;
1063 std::vector<signed char> int8_weights;
1064
1065 // we will not quantize the bias values
1066 if (j == 0)
1067 {
1068 if (int8_scale_term)
1069 {
1070 if (quantize_level == 0)
1071 {
1072 quantize_tag = 0x0002C056;
1073 }
1074 else if (quantize_level == 256)
1075 {
1076 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
1077 }
1078 }
1079 else if (quantize_level == 256)
1080 {
1081 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
1082 }
1083 else if (quantize_level == 65536)
1084 {
1085 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
1086 }
1087
1088 // write quantize tag first
1089 fwrite(&quantize_tag, sizeof(int), 1, bp);
1090
1091 if (quantize_tag)
1092 {
1093 int p0 = ftell(bp);
1094 if (int8_scale_term)
1095 {
1096 if (quantize_level == 0)
1097 {
1098 // write original data and int8scale
1099 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1100 }
1101 else if (quantize_level == 256)
1102 {
1103 fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
1104 }
1105 }
1106 else if (quantize_level == 256)
1107 {
1108 // write quantize table and index
1109 fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
1110 fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
1111 }
1112 else if (quantize_level == 65536)
1113 {
1114 fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
1115 }
1116
1117 // padding to 32bit align
1118 int nwrite = ftell(bp) - p0;
1119 int nalign = int(alignSize(nwrite, 4));
1120 unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
1121 fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
1122 }
1123 else
1124 {
1125 // write original data
1126 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1127 }
1128 }
1129 else
1130 {
1131 // write original data
1132 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1133 }
1134 }
1135
1136 if (int8_scale_term)
1137 {
1138 // write int8_scale data
1139 fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
1140 fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
1141 }
1142 }
1143 else if (layer.type() == "Input")
1144 {
1145 const caffe::InputParameter& input_param = layer.input_param();
1146 const caffe::BlobShape& bs = input_param.shape(0);
1147 if (bs.dim_size() == 4)
1148 {
1149 fprintf(pp, " 0=%zd", size_t(bs.dim(3)));
1150 fprintf(pp, " 1=%zd", size_t(bs.dim(2)));
1151 fprintf(pp, " 2=%zd", size_t(bs.dim(1)));
1152 }
1153 else if (bs.dim_size() == 3)
1154 {
1155 fprintf(pp, " 0=%zd", size_t(bs.dim(2)));
1156 fprintf(pp, " 1=%zd", size_t(bs.dim(1)));
1157 fprintf(pp, " 2=-233");
1158 }
1159 else if (bs.dim_size() == 2)
1160 {
1161 fprintf(pp, " 0=%zd", size_t(bs.dim(1)));
1162 fprintf(pp, " 1=-233");
1163 fprintf(pp, " 2=-233");
1164 }
1165 }
1166 else if (layer.type() == "Interp")
1167 {
1168 const caffe::InterpParameter& interp_param = layer.interp_param();
1169 fprintf(pp, " 0=%d", 2);
1170 fprintf(pp, " 1=%e", (float)interp_param.zoom_factor());
1171 fprintf(pp, " 2=%e", (float)interp_param.zoom_factor());
1172 fprintf(pp, " 3=%d", interp_param.height());
1173 fprintf(pp, " 4=%d", interp_param.width());
1174 }
1175 else if (layer.type() == "LRN")
1176 {
1177 const caffe::LRNParameter& lrn_param = layer.lrn_param();
1178 fprintf(pp, " 0=%d", lrn_param.norm_region());
1179 fprintf(pp, " 1=%d", lrn_param.local_size());
1180 fprintf(pp, " 2=%e", lrn_param.alpha());
1181 fprintf(pp, " 3=%e", lrn_param.beta());
1182 }
1183 else if (layer.type() == "LSTM")
1184 {
1185 const caffe::LayerParameter& binlayer = net.layer(netidx);
1186
1187 const caffe::BlobProto& weight_blob = binlayer.blobs(0);
1188 const caffe::RecurrentParameter& recurrent_param = layer.recurrent_param();
1189 fprintf(pp, " 0=%d", recurrent_param.num_output());
1190 fprintf(pp, " 1=%d", weight_blob.data_size());
1191
1192 for (int j = 0; j < binlayer.blobs_size(); j++)
1193 {
1194 int quantize_tag = 0;
1195 const caffe::BlobProto& blob = binlayer.blobs(j);
1196
1197 std::vector<float> quantize_table;
1198 std::vector<unsigned char> quantize_index;
1199
1200 std::vector<unsigned short> float16_weights;
1201
1202 if (quantize_level != 0)
1203 {
1204 if (quantize_level == 256)
1205 {
1206 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
1207 }
1208 else if (quantize_level == 65536)
1209 {
1210 quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
1211 }
1212 }
1213
1214 // write quantize tag first
1215 fwrite(&quantize_tag, sizeof(int), 1, bp);
1216
1217 if (quantize_tag)
1218 {
1219 int p0 = ftell(bp);
1220 if (quantize_level == 256)
1221 {
1222 // write quantize table and index
1223 fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
1224 fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
1225 }
1226 else if (quantize_level == 65536)
1227 {
1228 fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
1229 }
1230 // padding to 32bit align
1231 int nwrite = ftell(bp) - p0;
1232 int nalign = int(alignSize(nwrite, 4));
1233 unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
1234 fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
1235 }
1236 else
1237 {
1238 // write original data
1239 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1240 }
1241 }
1242 }
1243 else if (layer.type() == "MemoryData")
1244 {
1245 const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
1246 fprintf(pp, " 0=%d", memory_data_param.width());
1247 fprintf(pp, " 1=%d", memory_data_param.height());
1248 fprintf(pp, " 2=%d", memory_data_param.channels());
1249 }
1250 else if (layer.type() == "MVN")
1251 {
1252 const caffe::MVNParameter& mvn_param = layer.mvn_param();
1253 fprintf(pp, " 0=%d", mvn_param.normalize_variance());
1254 fprintf(pp, " 1=%d", mvn_param.across_channels());
1255 fprintf(pp, " 2=%e", mvn_param.eps());
1256 }
1257 else if (layer.type() == "Normalize")
1258 {
1259 const caffe::LayerParameter& binlayer = net.layer(netidx);
1260 const caffe::BlobProto& scale_blob = binlayer.blobs(0);
1261 const caffe::NormalizeParameter& norm_param = layer.norm_param();
1262 fprintf(pp, " 0=%d", norm_param.across_spatial());
1263 fprintf(pp, " 1=%d", norm_param.channel_shared());
1264 fprintf(pp, " 2=%e", norm_param.eps());
1265 fprintf(pp, " 3=%d", scale_blob.data_size());
1266
1267 fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
1268 }
1269 else if (layer.type() == "Permute")
1270 {
1271 const caffe::PermuteParameter& permute_param = layer.permute_param();
1272 int order_size = permute_param.order_size();
1273 int order_type = 0;
1274 if (order_size == 0)
1275 order_type = 0;
1276 if (order_size == 1)
1277 {
1278 int order0 = permute_param.order(0);
1279 if (order0 == 0)
1280 order_type = 0;
1281 // permute with N not supported
1282 }
1283 if (order_size == 2)
1284 {
1285 int order0 = permute_param.order(0);
1286 int order1 = permute_param.order(1);
1287 if (order0 == 0)
1288 {
1289 if (order1 == 1) // 0 1 2 3
1290 order_type = 0;
1291 else if (order1 == 2) // 0 2 1 3
1292 order_type = 2;
1293 else if (order1 == 3) // 0 3 1 2
1294 order_type = 4;
1295 }
1296 // permute with N not supported
1297 }
1298 if (order_size == 3 || order_size == 4)
1299 {
1300 int order0 = permute_param.order(0);
1301 int order1 = permute_param.order(1);
1302 int order2 = permute_param.order(2);
1303 if (order0 == 0)
1304 {
1305 if (order1 == 1)
1306 {
1307 if (order2 == 2) // 0 1 2 3
1308 order_type = 0;
1309 if (order2 == 3) // 0 1 3 2
1310 order_type = 1;
1311 }
1312 else if (order1 == 2)
1313 {
1314 if (order2 == 1) // 0 2 1 3
1315 order_type = 2;
1316 if (order2 == 3) // 0 2 3 1
1317 order_type = 3;
1318 }
1319 else if (order1 == 3)
1320 {
1321 if (order2 == 1) // 0 3 1 2
1322 order_type = 4;
1323 if (order2 == 2) // 0 3 2 1
1324 order_type = 5;
1325 }
1326 }
1327 // permute with N not supported
1328 }
1329 fprintf(pp, " 0=%d", order_type);
1330 }
1331 else if (layer.type() == "Pooling")
1332 {
1333 const caffe::PoolingParameter& pooling_param = layer.pooling_param();
1334 fprintf(pp, " 0=%d", pooling_param.pool());
1335 if (pooling_param.has_kernel_w() && pooling_param.has_kernel_h())
1336 {
1337 fprintf(pp, " 1=%d", pooling_param.kernel_w());
1338 fprintf(pp, " 11=%d", pooling_param.kernel_h());
1339 }
1340 else
1341 {
1342 fprintf(pp, " 1=%d", pooling_param.kernel_size());
1343 }
1344 if (pooling_param.has_stride_w() && pooling_param.has_stride_h())
1345 {
1346 fprintf(pp, " 2=%d", pooling_param.stride_w());
1347 fprintf(pp, " 12=%d", pooling_param.stride_h());
1348 }
1349 else
1350 {
1351 fprintf(pp, " 2=%d", pooling_param.stride());
1352 }
1353 if (pooling_param.has_pad_w() && pooling_param.has_pad_h())
1354 {
1355 fprintf(pp, " 3=%d", pooling_param.pad_w());
1356 fprintf(pp, " 13=%d", pooling_param.pad_h());
1357 }
1358 else
1359 {
1360 fprintf(pp, " 3=%d", pooling_param.pad());
1361 }
1362 fprintf(pp, " 4=%d", pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
1363 }
1364 else if (layer.type() == "Power")
1365 {
1366 const caffe::PowerParameter& power_param = layer.power_param();
1367 fprintf(pp, " 0=%e", power_param.power());
1368 fprintf(pp, " 1=%e", power_param.scale());
1369 fprintf(pp, " 2=%e", power_param.shift());
1370 }
1371 else if (layer.type() == "PReLU")
1372 {
1373 const caffe::LayerParameter& binlayer = net.layer(netidx);
1374 const caffe::BlobProto& slope_blob = binlayer.blobs(0);
1375 fprintf(pp, " 0=%d", slope_blob.data_size());
1376 fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
1377 }
1378 else if (layer.type() == "PriorBox")
1379 {
1380 const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
1381
1382 int num_aspect_ratio = prior_box_param.aspect_ratio_size();
1383 for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
1384 {
1385 float ar = prior_box_param.aspect_ratio(j);
1386 if (fabs(ar - 1.) < 1e-6)
1387 {
1388 num_aspect_ratio--;
1389 }
1390 }
1391
1392 float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
1393 if (prior_box_param.variance_size() == 4)
1394 {
1395 variances[0] = prior_box_param.variance(0);
1396 variances[1] = prior_box_param.variance(1);
1397 variances[2] = prior_box_param.variance(2);
1398 variances[3] = prior_box_param.variance(3);
1399 }
1400 else if (prior_box_param.variance_size() == 1)
1401 {
1402 variances[0] = prior_box_param.variance(0);
1403 variances[1] = prior_box_param.variance(0);
1404 variances[2] = prior_box_param.variance(0);
1405 variances[3] = prior_box_param.variance(0);
1406 }
1407
1408 int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
1409 int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
1410 int image_width = -233;
1411 int image_height = -233;
1412 if (prior_box_param.has_img_size())
1413 {
1414 image_width = prior_box_param.img_size();
1415 image_height = prior_box_param.img_size();
1416 }
1417 else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
1418 {
1419 image_width = prior_box_param.img_w();
1420 image_height = prior_box_param.img_h();
1421 }
1422
1423 float step_width = -233;
1424 float step_height = -233;
1425 if (prior_box_param.has_step())
1426 {
1427 step_width = prior_box_param.step();
1428 step_height = prior_box_param.step();
1429 }
1430 else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
1431 {
1432 step_width = prior_box_param.step_w();
1433 step_height = prior_box_param.step_h();
1434 }
1435
1436 fprintf(pp, " -23300=%d", prior_box_param.min_size_size());
1437 for (int j = 0; j < prior_box_param.min_size_size(); j++)
1438 {
1439 fprintf(pp, ",%e", prior_box_param.min_size(j));
1440 }
1441 fprintf(pp, " -23301=%d", prior_box_param.max_size_size());
1442 for (int j = 0; j < prior_box_param.max_size_size(); j++)
1443 {
1444 fprintf(pp, ",%e", prior_box_param.max_size(j));
1445 }
1446 fprintf(pp, " -23302=%d", num_aspect_ratio);
1447 for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
1448 {
1449 float ar = prior_box_param.aspect_ratio(j);
1450 if (fabs(ar - 1.) < 1e-6)
1451 {
1452 continue;
1453 }
1454 fprintf(pp, ",%e", ar);
1455 }
1456 fprintf(pp, " 3=%e", variances[0]);
1457 fprintf(pp, " 4=%e", variances[1]);
1458 fprintf(pp, " 5=%e", variances[2]);
1459 fprintf(pp, " 6=%e", variances[3]);
1460 fprintf(pp, " 7=%d", flip);
1461 fprintf(pp, " 8=%d", clip);
1462 fprintf(pp, " 9=%d", image_width);
1463 fprintf(pp, " 10=%d", image_height);
1464 fprintf(pp, " 11=%e", step_width);
1465 fprintf(pp, " 12=%e", step_height);
1466 fprintf(pp, " 13=%e", prior_box_param.offset());
1467 }
1468 else if (layer.type() == "PSROIPooling")
1469 {
1470 const caffe::PSROIPoolingParameter& psroi_pooling_param = layer.psroi_pooling_param();
1471 fprintf(pp, " 0=%d", psroi_pooling_param.group_size());
1472 fprintf(pp, " 1=%d", psroi_pooling_param.group_size());
1473 fprintf(pp, " 2=%e", psroi_pooling_param.spatial_scale());
1474 fprintf(pp, " 3=%d", psroi_pooling_param.output_dim());
1475 }
1476 else if (layer.type() == "Python")
1477 {
1478 const caffe::PythonParameter& python_param = layer.python_param();
1479 std::string python_layer_name = python_param.layer();
1480 if (python_layer_name == "ProposalLayer")
1481 {
1482 int feat_stride = 16;
1483 sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
1484
1485 int base_size = 16;
1486 // float ratio;
1487 // float scale;
1488 int pre_nms_topN = 6000;
1489 int after_nms_topN = 300;
1490 float nms_thresh = 0.7f;
1491 int min_size = 16;
1492 fprintf(pp, " 0=%d", feat_stride);
1493 fprintf(pp, " 1=%d", base_size);
1494 fprintf(pp, " 2=%d", pre_nms_topN);
1495 fprintf(pp, " 3=%d", after_nms_topN);
1496 fprintf(pp, " 4=%e", nms_thresh);
1497 fprintf(pp, " 5=%d", min_size);
1498 }
1499 }
1500 else if (layer.type() == "ReLU")
1501 {
1502 const caffe::ReLUParameter& relu_param = layer.relu_param();
1503 if (relu_param.has_negative_slope())
1504 {
1505 fprintf(pp, " 0=%e", relu_param.negative_slope());
1506 }
1507 }
1508 else if (layer.type() == "ReLU6")
1509 {
1510 float min = 0.f;
1511 float max = 6.f;
1512 fprintf(pp, " 0=%e", min);
1513 fprintf(pp, " 1=%e", max);
1514 }
1515 else if (layer.type() == "Reorg")
1516 {
1517 const caffe::ReorgParameter& reorg_param = layer.reorg_param();
1518 fprintf(pp, " 0=%d", reorg_param.stride());
1519 }
1520 else if (layer.type() == "Reshape")
1521 {
1522 const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
1523 const caffe::BlobShape& bs = reshape_param.shape();
1524 if (bs.dim_size() == 1)
1525 {
1526 fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(0)));
1527 }
1528 else if (bs.dim_size() == 2)
1529 {
1530 fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(1)));
1531 }
1532 else if (bs.dim_size() == 3)
1533 {
1534 fprintf(pp, " 0=%zd 1=%zd 2=-233", size_t(bs.dim(2)), size_t(bs.dim(1)));
1535 }
1536 else // bs.dim_size() == 4
1537 {
1538 fprintf(pp, " 0=%zd 1=%zd 2=%zd", size_t(bs.dim(3)), size_t(bs.dim(2)), size_t(bs.dim(1)));
1539 }
1540 fprintf(pp, " 3=0"); // permute
1541 }
1542 else if (layer.type() == "ROIAlign")
1543 {
1544 const caffe::ROIAlignParameter& roi_align_param = layer.roi_align_param();
1545 fprintf(pp, " 0=%d", roi_align_param.pooled_w());
1546 fprintf(pp, " 1=%d", roi_align_param.pooled_h());
1547 fprintf(pp, " 2=%e", roi_align_param.spatial_scale());
1548 fprintf(pp, " 3=%d", 0);
1549 fprintf(pp, " 4=%d", false);
1550 fprintf(pp, " 5=%d", 0);
1551 }
1552 else if (layer.type() == "ROIPooling")
1553 {
1554 const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
1555 fprintf(pp, " 0=%d", roi_pooling_param.pooled_w());
1556 fprintf(pp, " 1=%d", roi_pooling_param.pooled_h());
1557 fprintf(pp, " 2=%e", roi_pooling_param.spatial_scale());
1558 }
1559 else if (layer.type() == "Scale")
1560 {
1561 const caffe::LayerParameter& binlayer = net.layer(netidx);
1562
1563 const caffe::ScaleParameter& scale_param = layer.scale_param();
1564 bool scale_weight = scale_param.bias_term() ? (binlayer.blobs_size() == 2) : (binlayer.blobs_size() == 1);
1565 if (scale_weight)
1566 {
1567 const caffe::BlobProto& weight_blob = binlayer.blobs(0);
1568 fprintf(pp, " 0=%d", int(weight_blob.data_size()));
1569 }
1570 else
1571 {
1572 fprintf(pp, " 0=-233");
1573 }
1574
1575 fprintf(pp, " 1=%d", scale_param.bias_term());
1576
1577 for (int j = 0; j < binlayer.blobs_size(); j++)
1578 {
1579 const caffe::BlobProto& blob = binlayer.blobs(j);
1580 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1581 }
1582 }
1583 else if (layer.type() == "ShuffleChannel")
1584 {
1585 const caffe::ShuffleChannelParameter& shuffle_channel_param = layer.shuffle_channel_param();
1586 fprintf(pp, " 0=%d", shuffle_channel_param.group());
1587 }
1588 else if (layer.type() == "Slice")
1589 {
1590 const caffe::SliceParameter& slice_param = layer.slice_param();
1591 if (slice_param.slice_point_size() == 0)
1592 {
1593 int num_slice = layer.top_size();
1594 fprintf(pp, " -23300=%d", num_slice);
1595 for (int j = 0; j < num_slice; j++)
1596 {
1597 fprintf(pp, ",-233");
1598 }
1599 }
1600 else
1601 {
1602 int num_slice = slice_param.slice_point_size() + 1;
1603 fprintf(pp, " -23300=%d", num_slice);
1604 int prev_offset = 0;
1605 for (int j = 0; j < slice_param.slice_point_size(); j++)
1606 {
1607 int offset = slice_param.slice_point(j);
1608 fprintf(pp, ",%d", offset - prev_offset);
1609 prev_offset = offset;
1610 }
1611 fprintf(pp, ",-233");
1612 }
1613 int axis = 0;
1614 if (slice_param.has_axis())
1615 {
1616 axis = slice_param.axis() - 1;
1617 }
1618 else if (slice_param.has_slice_dim())
1619 {
1620 axis = slice_param.slice_dim() - 1;
1621 }
1622 fprintf(pp, " 1=%d", axis);
1623 }
1624 else if (layer.type() == "Softmax")
1625 {
1626 const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
1627 int dim = softmax_param.axis() - 1;
1628 fprintf(pp, " 0=%d", dim);
1629 fprintf(pp, " 1=1");
1630 }
1631 else if (layer.type() == "Threshold")
1632 {
1633 const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
1634 fprintf(pp, " 0=%e", threshold_param.threshold());
1635 }
1636 else if (layer.type() == "YoloDetectionOutput")
1637 {
1638 const caffe::YoloDetectionOutputParameter& yolo_detection_output_param = layer.yolo_detection_output_param();
1639
1640 fprintf(pp, " 0=%d", yolo_detection_output_param.num_classes());
1641 fprintf(pp, " 1=%d", yolo_detection_output_param.num_box());
1642 fprintf(pp, " 2=%e", yolo_detection_output_param.confidence_threshold());
1643 fprintf(pp, " 3=%e", yolo_detection_output_param.nms_threshold());
1644
1645 int num_bias = yolo_detection_output_param.biases_size();
1646 fprintf(pp, " -23304=%d", num_bias);
1647 for (int j = 0; j < num_bias; j++)
1648 {
1649 fprintf(pp, ",%e", yolo_detection_output_param.biases(j));
1650 }
1651 }
1652 else if (layer.type() == "Yolov3DetectionOutput")
1653 {
1654 const caffe::Yolov3DetectionOutputParameter& yolov3_detection_output_param = layer.yolov3_detection_output_param();
1655
1656 fprintf(pp, " 0=%d", yolov3_detection_output_param.num_classes());
1657 fprintf(pp, " 1=%d", yolov3_detection_output_param.num_box());
1658 fprintf(pp, " 2=%e", yolov3_detection_output_param.confidence_threshold());
1659 fprintf(pp, " 3=%e", yolov3_detection_output_param.nms_threshold());
1660
1661 int num_bias = yolov3_detection_output_param.biases_size();
1662 fprintf(pp, " -23304=%d", num_bias);
1663 for (int j = 0; j < num_bias; j++)
1664 {
1665 fprintf(pp, ",%e", yolov3_detection_output_param.biases(j));
1666 }
1667 int num_mask = yolov3_detection_output_param.mask_size();
1668 fprintf(pp, " -23305=%d", num_mask);
1669 for (int j = 0; j < num_mask; j++)
1670 {
1671 fprintf(pp, ",%e", (float)yolov3_detection_output_param.mask(j));
1672 }
1673 int num_anchors = yolov3_detection_output_param.anchors_scale_size();
1674 fprintf(pp, " -23306=%d", num_anchors);
1675 for (int j = 0; j < num_anchors; j++)
1676 {
1677 fprintf(pp, ",%e", (float)yolov3_detection_output_param.anchors_scale(j));
1678 }
1679 fprintf(pp, " 7=%d", yolov3_detection_output_param.mask_group_num());
1680 }
1681 fprintf(pp, "\n");
1682
1683 // add split layer if top reference larger than one
1684 if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
1685 {
1686 std::string blob_name = blob_name_decorated[layer.top(0)];
1687 if (bottom_reference.find(blob_name) != bottom_reference.end())
1688 {
1689 int refcount = bottom_reference[blob_name];
1690 if (refcount > 1)
1691 {
1692 char splitname[256];
1693 sprintf(splitname, "splitncnn_%d", internal_split);
1694 fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
1695 fprintf(pp, " %s", blob_name.c_str());
1696
1697 for (int j = 0; j < refcount; j++)
1698 {
1699 fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
1700 }
1701 fprintf(pp, "\n");
1702
1703 internal_split++;
1704 }
1705 }
1706 }
1707 else
1708 {
1709 for (int j = 0; j < layer.top_size(); j++)
1710 {
1711 std::string blob_name = layer.top(j);
1712 if (bottom_reference.find(blob_name) != bottom_reference.end())
1713 {
1714 int refcount = bottom_reference[blob_name];
1715 if (refcount > 1)
1716 {
1717 char splitname[256];
1718 sprintf(splitname, "splitncnn_%d", internal_split);
1719 fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
1720 fprintf(pp, " %s", blob_name.c_str());
1721
1722 for (int j = 0; j < refcount; j++)
1723 {
1724 fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
1725 }
1726 fprintf(pp, "\n");
1727
1728 internal_split++;
1729 }
1730 }
1731 }
1732 }
1733 }
1734
1735 fclose(pp);
1736 fclose(bp);
1737
1738 return 0;
1739 }
1740