1 // Tencent is pleased to support the open source community by making ncnn available.
2 //
3 // Copyright (C) 2017 THL A29 Limited, a Tencent company. All rights reserved.
4 //
5 // Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // https://opensource.org/licenses/BSD-3-Clause
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifdef _MSC_VER
16 #define _CRT_SECURE_NO_DEPRECATE
17 #endif
18 
19 #include "caffe.pb.h"
20 
21 #include <algorithm>
22 #include <fstream>
23 #include <google/protobuf/io/coded_stream.h>
24 #include <google/protobuf/io/zero_copy_stream_impl.h>
25 #include <google/protobuf/message.h>
26 #include <google/protobuf/text_format.h>
27 #include <limits.h>
28 #include <limits>
29 #include <map>
30 #include <math.h>
31 #include <set>
32 #include <stdio.h>
33 
alignSize(size_t sz,int n)34 static inline size_t alignSize(size_t sz, int n)
35 {
36     return (sz + n - 1) & -n;
37 }
38 
39 // convert float to half precision floating point
float2half(float value)40 static unsigned short float2half(float value)
41 {
42     // 1 : 8 : 23
43     union
44     {
45         unsigned int u;
46         float f;
47     } tmp;
48 
49     tmp.f = value;
50 
51     // 1 : 8 : 23
52     unsigned short sign = (tmp.u & 0x80000000) >> 31;
53     unsigned short exponent = (tmp.u & 0x7F800000) >> 23;
54     unsigned int significand = tmp.u & 0x7FFFFF;
55 
56     //     fprintf(stderr, "%d %d %d\n", sign, exponent, significand);
57 
58     // 1 : 5 : 10
59     unsigned short fp16;
60     if (exponent == 0)
61     {
62         // zero or denormal, always underflow
63         fp16 = (sign << 15) | (0x00 << 10) | 0x00;
64     }
65     else if (exponent == 0xFF)
66     {
67         // infinity or NaN
68         fp16 = (sign << 15) | (0x1F << 10) | (significand ? 0x200 : 0x00);
69     }
70     else
71     {
72         // normalized
73         short newexp = exponent + (-127 + 15);
74         if (newexp >= 31)
75         {
76             // overflow, return infinity
77             fp16 = (sign << 15) | (0x1F << 10) | 0x00;
78         }
79         else if (newexp <= 0)
80         {
81             // underflow
82             if (newexp >= -10)
83             {
84                 // denormal half-precision
85                 unsigned short sig = (significand | 0x800000) >> (14 - newexp);
86                 fp16 = (sign << 15) | (0x00 << 10) | sig;
87             }
88             else
89             {
90                 // underflow
91                 fp16 = (sign << 15) | (0x00 << 10) | 0x00;
92             }
93         }
94         else
95         {
96             fp16 = (sign << 15) | (newexp << 10) | (significand >> 13);
97         }
98     }
99 
100     return fp16;
101 }
102 
103 // round to nearest
float2int8(float value)104 static signed char float2int8(float value)
105 {
106     float tmp;
107     if (value >= 0.f)
108         tmp = value + 0.5f;
109     else
110         tmp = value - 0.5f;
111 
112     if (tmp > 127)
113         return 127;
114     if (tmp < -127)
115         return -127;
116 
117     return static_cast<signed char>(tmp);
118 }
119 
read_int8scale_table(const char * filepath,std::map<std::string,std::vector<float>> & blob_int8scale_table,std::map<std::string,std::vector<float>> & weight_int8scale_table)120 static bool read_int8scale_table(const char* filepath, std::map<std::string, std::vector<float> >& blob_int8scale_table, std::map<std::string, std::vector<float> >& weight_int8scale_table)
121 {
122     blob_int8scale_table.clear();
123     weight_int8scale_table.clear();
124 
125     FILE* fp = fopen(filepath, "rb");
126     if (!fp)
127     {
128         fprintf(stderr, "fopen %s failed\n", filepath);
129         return false;
130     }
131 
132     bool in_scale_vector = false;
133 
134     std::string keystr;
135     std::vector<float> scales;
136 
137     while (!feof(fp))
138     {
139         char key[256];
140         int nscan = fscanf(fp, "%255s", key);
141         if (nscan != 1)
142         {
143             break;
144         }
145 
146         if (in_scale_vector)
147         {
148             float scale = 1.f;
149             int nscan = sscanf(key, "%f", &scale);
150             if (nscan == 1)
151             {
152                 scales.push_back(scale);
153                 continue;
154             }
155             else
156             {
157                 // XYZ_param_N pattern
158                 if (strstr(keystr.c_str(), "_param_"))
159                 {
160                     weight_int8scale_table[keystr] = scales;
161                 }
162                 else
163                 {
164                     blob_int8scale_table[keystr] = scales;
165                 }
166 
167                 keystr.clear();
168                 scales.clear();
169 
170                 in_scale_vector = false;
171             }
172         }
173 
174         if (!in_scale_vector)
175         {
176             keystr = key;
177 
178             in_scale_vector = true;
179         }
180     }
181 
182     if (in_scale_vector)
183     {
184         // XYZ_param_N pattern
185         if (strstr(keystr.c_str(), "_param_"))
186         {
187             weight_int8scale_table[keystr] = scales;
188         }
189         else
190         {
191             blob_int8scale_table[keystr] = scales;
192         }
193     }
194 
195     fclose(fp);
196 
197     return true;
198 }
199 
quantize_weight(float * data,size_t data_length,std::vector<unsigned short> & float16_weights)200 static int quantize_weight(float* data, size_t data_length, std::vector<unsigned short>& float16_weights)
201 {
202     float16_weights.resize(data_length);
203 
204     for (size_t i = 0; i < data_length; i++)
205     {
206         float f = data[i];
207 
208         unsigned short fp16 = float2half(f);
209 
210         float16_weights[i] = fp16;
211     }
212 
213     // magic tag for half-precision floating point
214     return 0x01306B47;
215 }
216 
quantize_weight(float * data,size_t data_length,std::vector<float> scales,std::vector<signed char> & int8_weights)217 static int quantize_weight(float* data, size_t data_length, std::vector<float> scales, std::vector<signed char>& int8_weights)
218 {
219     int8_weights.resize(data_length);
220 
221     const int length_per_group = static_cast<int>(data_length / scales.size());
222 
223     for (size_t i = 0; i < data_length; i++)
224     {
225         float f = data[i];
226 
227         signed char int8 = float2int8(f * scales[i / length_per_group]);
228 
229         int8_weights[i] = int8;
230     }
231 
232     // magic tag for int8
233     return 0x000D4B38;
234 }
235 
quantize_weight(float * data,size_t data_length,int quantize_level,std::vector<float> & quantize_table,std::vector<unsigned char> & quantize_index)236 static bool quantize_weight(float* data, size_t data_length, int quantize_level, std::vector<float>& quantize_table, std::vector<unsigned char>& quantize_index)
237 {
238     assert(quantize_level != 0);
239     assert(data != NULL);
240     assert(data_length > 0);
241 
242     if (data_length < static_cast<size_t>(quantize_level))
243     {
244         fprintf(stderr, "No need quantize,because: data_length < quantize_level");
245         return false;
246     }
247 
248     quantize_table.reserve(quantize_level);
249     quantize_index.reserve(data_length);
250 
251     // 1. Find min and max value
252     float max_value = std::numeric_limits<float>::min();
253     float min_value = std::numeric_limits<float>::max();
254 
255     for (size_t i = 0; i < data_length; ++i)
256     {
257         if (max_value < data[i]) max_value = data[i];
258         if (min_value > data[i]) min_value = data[i];
259     }
260     float strides = (max_value - min_value) / quantize_level;
261 
262     // 2. Generate quantize table
263     for (int i = 0; i < quantize_level; ++i)
264     {
265         quantize_table.push_back(min_value + i * strides);
266     }
267 
268     // 3. Align data to the quantized value
269     for (size_t i = 0; i < data_length; ++i)
270     {
271         int table_index = int((data[i] - min_value) / strides);
272         table_index = std::min(table_index, quantize_level - 1);
273 
274         float low_value = quantize_table[table_index];
275         float high_value = low_value + strides;
276 
277         // find a nearest value between low and high value.
278         const float targetValue = data[i] - low_value < high_value - data[i] ? low_value : high_value;
279 
280         table_index = int((targetValue - min_value) / strides);
281         table_index = std::min(table_index, quantize_level - 1);
282         quantize_index.push_back(table_index);
283     }
284 
285     return true;
286 }
287 
read_proto_from_text(const char * filepath,google::protobuf::Message * message)288 static bool read_proto_from_text(const char* filepath, google::protobuf::Message* message)
289 {
290     std::ifstream fs(filepath, std::ifstream::in);
291     if (!fs.is_open())
292     {
293         fprintf(stderr, "open failed %s\n", filepath);
294         return false;
295     }
296 
297     google::protobuf::io::IstreamInputStream input(&fs);
298     bool success = google::protobuf::TextFormat::Parse(&input, message);
299 
300     fs.close();
301 
302     return success;
303 }
304 
read_proto_from_binary(const char * filepath,google::protobuf::Message * message)305 static bool read_proto_from_binary(const char* filepath, google::protobuf::Message* message)
306 {
307     std::ifstream fs(filepath, std::ifstream::in | std::ifstream::binary);
308     if (!fs.is_open())
309     {
310         fprintf(stderr, "open failed %s\n", filepath);
311         return false;
312     }
313 
314     google::protobuf::io::IstreamInputStream input(&fs);
315     google::protobuf::io::CodedInputStream codedstr(&input);
316 
317 #if GOOGLE_PROTOBUF_VERSION >= 3011000
318     codedstr.SetTotalBytesLimit(INT_MAX);
319 #else
320     codedstr.SetTotalBytesLimit(INT_MAX, INT_MAX / 2);
321 #endif
322 
323     bool success = message->ParseFromCodedStream(&codedstr);
324 
325     fs.close();
326 
327     return success;
328 }
329 
main(int argc,char ** argv)330 int main(int argc, char** argv)
331 {
332     if (!(argc == 3 || argc == 5 || argc == 6 || argc == 7))
333     {
334         fprintf(stderr, "Usage: %s [caffeproto] [caffemodel] [ncnnproto] [ncnnbin] [quantizelevel] [int8scaletable]\n", argv[0]);
335         return -1;
336     }
337 
338     const char* caffeproto = argv[1];
339     const char* caffemodel = argv[2];
340     const char* ncnn_prototxt = argc >= 5 ? argv[3] : "ncnn.proto";
341     const char* ncnn_modelbin = argc >= 5 ? argv[4] : "ncnn.bin";
342     const char* quantize_param = argc >= 6 ? argv[5] : "0";
343     const char* int8scale_table_path = argc == 7 ? argv[6] : NULL;
344     int quantize_level = atoi(quantize_param);
345 
346     if (quantize_level != 0 && quantize_level != 256 && quantize_level != 65536)
347     {
348         fprintf(stderr, "%s: only support quantize level = 0, 256, or 65536", argv[0]);
349         return -1;
350     }
351 
352     caffe::NetParameter proto;
353     caffe::NetParameter net;
354 
355     // load
356     bool s0 = read_proto_from_text(caffeproto, &proto);
357     if (!s0)
358     {
359         fprintf(stderr, "read_proto_from_text failed\n");
360         return -1;
361     }
362 
363     bool s1 = read_proto_from_binary(caffemodel, &net);
364     if (!s1)
365     {
366         fprintf(stderr, "read_proto_from_binary failed\n");
367         return -1;
368     }
369 
370     std::map<std::string, std::vector<float> > blob_int8scale_table;
371     std::map<std::string, std::vector<float> > weight_int8scale_table;
372     if (int8scale_table_path)
373     {
374         bool s2 = read_int8scale_table(int8scale_table_path, blob_int8scale_table, weight_int8scale_table);
375         if (!s2)
376         {
377             fprintf(stderr, "read_int8scale_table failed\n");
378             return -1;
379         }
380     }
381 
382     FILE* pp = fopen(ncnn_prototxt, "wb");
383     FILE* bp = fopen(ncnn_modelbin, "wb");
384 
385     // magic
386     fprintf(pp, "7767517\n");
387 
388     // rename mapping for identical bottom top style
389     std::map<std::string, std::string> blob_name_decorated;
390 
391     // bottom blob reference
392     std::map<std::string, int> bottom_reference;
393 
394     // global definition line
395     // [layer count] [blob count]
396     int layer_count = proto.layer_size();
397     std::set<std::string> blob_names;
398     for (int i = 0; i < layer_count; i++)
399     {
400         const caffe::LayerParameter& layer = proto.layer(i);
401 
402         for (int j = 0; j < layer.bottom_size(); j++)
403         {
404             std::string blob_name = layer.bottom(j);
405             if (blob_name_decorated.find(blob_name) != blob_name_decorated.end())
406             {
407                 blob_name = blob_name_decorated[blob_name];
408             }
409 
410             blob_names.insert(blob_name);
411 
412             if (bottom_reference.find(blob_name) == bottom_reference.end())
413             {
414                 bottom_reference[blob_name] = 1;
415             }
416             else
417             {
418                 bottom_reference[blob_name] = bottom_reference[blob_name] + 1;
419             }
420         }
421 
422         if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
423         {
424             std::string blob_name = layer.top(0) + "_" + layer.name();
425             blob_name_decorated[layer.top(0)] = blob_name;
426             blob_names.insert(blob_name);
427         }
428         else
429         {
430             for (int j = 0; j < layer.top_size(); j++)
431             {
432                 std::string blob_name = layer.top(j);
433                 blob_names.insert(blob_name);
434             }
435         }
436     }
437     // remove bottom_reference entry with reference equals to one
438     int splitncnn_blob_count = 0;
439     std::map<std::string, int>::iterator it = bottom_reference.begin();
440     while (it != bottom_reference.end())
441     {
442         if (it->second == 1)
443         {
444             bottom_reference.erase(it++);
445         }
446         else
447         {
448             splitncnn_blob_count += it->second;
449             //             fprintf(stderr, "%s %d\n", it->first.c_str(), it->second);
450             ++it;
451         }
452     }
453     fprintf(pp, "%d %d\n", int(layer_count + bottom_reference.size()), int(blob_names.size() + splitncnn_blob_count));
454 
455     // populate
456     blob_name_decorated.clear();
457     int internal_split = 0;
458     for (int i = 0; i < layer_count; i++)
459     {
460         const caffe::LayerParameter& layer = proto.layer(i);
461 
462         // layer definition line, repeated
463         // [type] [name] [bottom blob count] [top blob count] [bottom blobs] [top blobs] [layer specific params]
464         if (layer.type() == "BN")
465         {
466             fprintf(pp, "%-16s", "Scale");
467         }
468         else if (layer.type() == "Convolution")
469         {
470             const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
471             if (convolution_param.group() != 1)
472                 fprintf(pp, "%-16s", "ConvolutionDepthWise");
473             else
474                 fprintf(pp, "%-16s", "Convolution");
475         }
476         else if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
477         {
478             fprintf(pp, "%-16s", "ConvolutionDepthWise");
479         }
480         else if (layer.type() == "Deconvolution")
481         {
482             const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
483             if (convolution_param.group() != 1)
484                 fprintf(pp, "%-16s", "DeconvolutionDepthWise");
485             else
486                 fprintf(pp, "%-16s", "Deconvolution");
487         }
488         else if (layer.type() == "MemoryData")
489         {
490             fprintf(pp, "%-16s", "Input");
491         }
492         else if (layer.type() == "Python")
493         {
494             const caffe::PythonParameter& python_param = layer.python_param();
495             std::string python_layer_name = python_param.layer();
496             if (python_layer_name == "ProposalLayer")
497                 fprintf(pp, "%-16s", "Proposal");
498             else
499                 fprintf(pp, "%-16s", python_layer_name.c_str());
500         }
501         else if (layer.type() == "ReLU6")
502         {
503             fprintf(pp, "%-16s", "Clip");
504         }
505         else if (layer.type() == "Silence")
506         {
507             fprintf(pp, "%-16s", "Noop");
508         }
509         else
510         {
511             fprintf(pp, "%-16s", layer.type().c_str());
512         }
513         fprintf(pp, " %-16s %d %d", layer.name().c_str(), layer.bottom_size(), layer.top_size());
514 
515         for (int j = 0; j < layer.bottom_size(); j++)
516         {
517             std::string blob_name = layer.bottom(j);
518             if (blob_name_decorated.find(layer.bottom(j)) != blob_name_decorated.end())
519             {
520                 blob_name = blob_name_decorated[layer.bottom(j)];
521             }
522 
523             if (bottom_reference.find(blob_name) != bottom_reference.end())
524             {
525                 int refidx = bottom_reference[blob_name] - 1;
526                 bottom_reference[blob_name] = refidx;
527 
528                 char splitsuffix[256];
529                 sprintf(splitsuffix, "_splitncnn_%d", refidx);
530                 blob_name = blob_name + splitsuffix;
531             }
532 
533             fprintf(pp, " %s", blob_name.c_str());
534         }
535 
536         // decorated
537         if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
538         {
539             std::string blob_name = layer.top(0) + "_" + layer.name();
540             blob_name_decorated[layer.top(0)] = blob_name;
541 
542             fprintf(pp, " %s", blob_name.c_str());
543         }
544         else
545         {
546             for (int j = 0; j < layer.top_size(); j++)
547             {
548                 std::string blob_name = layer.top(j);
549                 fprintf(pp, " %s", blob_name.c_str());
550             }
551         }
552 
553         // find blob binary by layer name
554         int netidx;
555         for (netidx = 0; netidx < net.layer_size(); netidx++)
556         {
557             if (net.layer(netidx).name() == layer.name())
558             {
559                 break;
560             }
561         }
562 
563         // layer specific params
564         if (layer.type() == "BatchNorm")
565         {
566             const caffe::LayerParameter& binlayer = net.layer(netidx);
567 
568             const caffe::BlobProto& mean_blob = binlayer.blobs(0);
569             const caffe::BlobProto& var_blob = binlayer.blobs(1);
570             fprintf(pp, " 0=%d", (int)mean_blob.data_size());
571 
572             const caffe::BatchNormParameter& batch_norm_param = layer.batch_norm_param();
573             float eps = batch_norm_param.eps();
574 
575             std::vector<float> ones(mean_blob.data_size(), 1.f);
576             fwrite(ones.data(), sizeof(float), ones.size(), bp); // slope
577 
578             if (binlayer.blobs_size() < 3)
579             {
580                 fwrite(mean_blob.data().data(), sizeof(float), mean_blob.data_size(), bp);
581                 float tmp;
582                 for (int j = 0; j < var_blob.data_size(); j++)
583                 {
584                     tmp = var_blob.data().data()[j] + eps;
585                     fwrite(&tmp, sizeof(float), 1, bp);
586                 }
587             }
588             else
589             {
590                 float scale_factor = binlayer.blobs(2).data().data()[0] == 0 ? 0 : 1 / binlayer.blobs(2).data().data()[0];
591                 // premultiply scale_factor to mean and variance
592                 float tmp;
593                 for (int j = 0; j < mean_blob.data_size(); j++)
594                 {
595                     tmp = mean_blob.data().data()[j] * scale_factor;
596                     fwrite(&tmp, sizeof(float), 1, bp);
597                 }
598                 for (int j = 0; j < var_blob.data_size(); j++)
599                 {
600                     tmp = var_blob.data().data()[j] * scale_factor + eps;
601                     fwrite(&tmp, sizeof(float), 1, bp);
602                 }
603             }
604 
605             std::vector<float> zeros(mean_blob.data_size(), 0.f);
606             fwrite(zeros.data(), sizeof(float), zeros.size(), bp); // bias
607         }
608         else if (layer.type() == "BN")
609         {
610             const caffe::LayerParameter& binlayer = net.layer(netidx);
611 
612             const caffe::BlobProto& scale_blob = binlayer.blobs(0);
613             const caffe::BlobProto& shift_blob = binlayer.blobs(1);
614             fprintf(pp, " 0=%d", (int)scale_blob.data_size());
615             fprintf(pp, " 1=1");
616 
617             fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
618             fwrite(shift_blob.data().data(), sizeof(float), shift_blob.data_size(), bp);
619         }
620         else if (layer.type() == "Concat")
621         {
622             const caffe::ConcatParameter& concat_param = layer.concat_param();
623             int axis = concat_param.axis() - 1;
624             fprintf(pp, " 0=%d", axis);
625         }
626         else if (layer.type() == "Convolution" || layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
627         {
628             const caffe::LayerParameter& binlayer = net.layer(netidx);
629 
630             const caffe::BlobProto& weight_blob = binlayer.blobs(0);
631             const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
632             fprintf(pp, " 0=%d", convolution_param.num_output());
633             if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
634             {
635                 fprintf(pp, " 1=%d", convolution_param.kernel_w());
636                 fprintf(pp, " 11=%d", convolution_param.kernel_h());
637             }
638             else
639             {
640                 fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
641             }
642             fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
643             if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
644             {
645                 fprintf(pp, " 3=%d", convolution_param.stride_w());
646                 fprintf(pp, " 13=%d", convolution_param.stride_h());
647             }
648             else
649             {
650                 fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
651             }
652             if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
653             {
654                 fprintf(pp, " 4=%d", convolution_param.pad_w());
655                 fprintf(pp, " 14=%d", convolution_param.pad_h());
656             }
657             else
658             {
659                 fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
660             }
661             fprintf(pp, " 5=%d", convolution_param.bias_term());
662             fprintf(pp, " 6=%d", weight_blob.data_size());
663 
664             int num_group = 1;
665             if (layer.type() == "ConvolutionDepthwise" || layer.type() == "DepthwiseConvolution")
666             {
667                 num_group = convolution_param.num_output();
668             }
669             else
670             {
671                 num_group = convolution_param.group();
672             }
673 
674             if (num_group != 1)
675             {
676                 fprintf(pp, " 7=%d", num_group);
677             }
678 
679             bool int8_scale_term = false;
680             std::vector<float> weight_int8scale;
681             std::vector<float> blob_int8scale;
682 
683             if (int8scale_table_path)
684             {
685                 char key[256];
686                 sprintf(key, "%s_param_0", layer.name().c_str());
687                 if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
688                 {
689                     weight_int8scale = weight_int8scale_table[std::string(key)];
690                 }
691 
692                 if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
693                 {
694                     blob_int8scale = blob_int8scale_table[layer.name()];
695                 }
696 
697                 int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
698 
699                 if (int8_scale_term)
700                 {
701                     if ((int)weight_int8scale.size() == num_group)
702                     {
703                         fprintf(pp, " 8=1");
704                     }
705                     else
706                     {
707                         fprintf(pp, " 8=2");
708                     }
709                 }
710             }
711 
712             for (int j = 0; j < binlayer.blobs_size(); j++)
713             {
714                 int quantize_tag = 0;
715                 const caffe::BlobProto& blob = binlayer.blobs(j);
716 
717                 std::vector<float> quantize_table;
718                 std::vector<unsigned char> quantize_index;
719 
720                 std::vector<unsigned short> float16_weights;
721                 std::vector<signed char> int8_weights;
722 
723                 // we will not quantize the bias values
724                 if (j == 0)
725                 {
726                     if (int8_scale_term)
727                     {
728                         if (quantize_level == 0)
729                         {
730                             quantize_tag = 0x0002C056;
731                         }
732                         else if (quantize_level == 256)
733                         {
734                             quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
735                         }
736                     }
737                     else if (quantize_level == 256)
738                     {
739                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
740                     }
741                     else if (quantize_level == 65536)
742                     {
743                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
744                     }
745 
746                     // write quantize tag first
747                     fwrite(&quantize_tag, sizeof(int), 1, bp);
748 
749                     if (quantize_tag)
750                     {
751                         int p0 = ftell(bp);
752                         if (int8_scale_term)
753                         {
754                             if (quantize_level == 0)
755                             {
756                                 // write original data and int8scale
757                                 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
758                             }
759                             else if (quantize_level == 256)
760                             {
761                                 fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
762                             }
763                         }
764                         else if (quantize_level == 256)
765                         {
766                             // write quantize table and index
767                             fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
768                             fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
769                         }
770                         else if (quantize_level == 65536)
771                         {
772                             fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
773                         }
774 
775                         // padding to 32bit align
776                         int nwrite = ftell(bp) - p0;
777                         int nalign = int(alignSize(nwrite, 4));
778                         unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
779                         fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
780                     }
781                     else
782                     {
783                         // write original data
784                         fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
785                     }
786                 }
787                 else
788                 {
789                     // write original data
790                     fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
791                 }
792             }
793 
794             if (int8_scale_term)
795             {
796                 // write int8_scale data
797                 fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
798                 fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
799             }
800         }
801         else if (layer.type() == "Crop")
802         {
803             const caffe::CropParameter& crop_param = layer.crop_param();
804             int num_offset = crop_param.offset_size();
805             if (num_offset == 1)
806             {
807                 int offset = crop_param.offset(0);
808                 int axis = crop_param.axis() - 1;
809                 if (axis == 0)
810                 {
811                     fprintf(pp, " 0=%d", offset);
812                     fprintf(pp, " 1=%d", offset);
813                     fprintf(pp, " 2=%d", offset);
814                 }
815                 else if (axis == 1)
816                 {
817                     fprintf(pp, " 0=%d", offset);
818                     fprintf(pp, " 1=%d", offset);
819                 }
820                 else if (axis == 2)
821                 {
822                     fprintf(pp, " 0=%d", offset);
823                 }
824             }
825             else if (num_offset == 2)
826             {
827                 int woffset = crop_param.offset(1);
828                 int hoffset = crop_param.offset(0);
829                 fprintf(pp, " 0=%d", woffset);
830                 fprintf(pp, " 1=%d", hoffset);
831             }
832             else if (num_offset == 3)
833             {
834                 int woffset = crop_param.offset(2);
835                 int hoffset = crop_param.offset(1);
836                 int coffset = crop_param.offset(0);
837                 fprintf(pp, " 0=%d", woffset);
838                 fprintf(pp, " 1=%d", hoffset);
839                 fprintf(pp, " 2=%d", coffset);
840             }
841         }
842         else if (layer.type() == "Deconvolution")
843         {
844             const caffe::LayerParameter& binlayer = net.layer(netidx);
845 
846             const caffe::BlobProto& weight_blob = binlayer.blobs(0);
847             const caffe::ConvolutionParameter& convolution_param = layer.convolution_param();
848             fprintf(pp, " 0=%d", convolution_param.num_output());
849             if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
850             {
851                 fprintf(pp, " 1=%d", convolution_param.kernel_w());
852                 fprintf(pp, " 11=%d", convolution_param.kernel_h());
853             }
854             else
855             {
856                 fprintf(pp, " 1=%d", convolution_param.kernel_size(0));
857             }
858             fprintf(pp, " 2=%d", convolution_param.dilation_size() != 0 ? convolution_param.dilation(0) : 1);
859             if (convolution_param.has_stride_w() && convolution_param.has_stride_h())
860             {
861                 fprintf(pp, " 3=%d", convolution_param.stride_w());
862                 fprintf(pp, " 13=%d", convolution_param.stride_h());
863             }
864             else
865             {
866                 fprintf(pp, " 3=%d", convolution_param.stride_size() != 0 ? convolution_param.stride(0) : 1);
867             }
868             if (convolution_param.has_pad_w() && convolution_param.has_pad_h())
869             {
870                 fprintf(pp, " 4=%d", convolution_param.pad_w());
871                 fprintf(pp, " 14=%d", convolution_param.pad_h());
872             }
873             else
874             {
875                 fprintf(pp, " 4=%d", convolution_param.pad_size() != 0 ? convolution_param.pad(0) : 0);
876             }
877             fprintf(pp, " 5=%d", convolution_param.bias_term());
878             fprintf(pp, " 6=%d", weight_blob.data_size());
879 
880             int group = convolution_param.group();
881             if (group != 1)
882             {
883                 fprintf(pp, " 7=%d", group);
884             }
885 
886             int quantized_weight = 0;
887             fwrite(&quantized_weight, sizeof(int), 1, bp);
888 
889             int maxk = 0;
890             if (convolution_param.has_kernel_w() && convolution_param.has_kernel_h())
891             {
892                 maxk = convolution_param.kernel_w() * convolution_param.kernel_h();
893             }
894             else
895             {
896                 maxk = convolution_param.kernel_size(0) * convolution_param.kernel_size(0);
897             }
898             for (int g = 0; g < group; g++)
899             {
900                 // reorder weight from inch-outch to outch-inch
901                 int num_output = convolution_param.num_output() / group;
902                 int num_input = weight_blob.data_size() / maxk / num_output / group;
903                 const float* weight_data_ptr = weight_blob.data().data() + g * maxk * num_output * num_input;
904                 for (int k = 0; k < num_output; k++)
905                 {
906                     for (int j = 0; j < num_input; j++)
907                     {
908                         fwrite(weight_data_ptr + (j * num_output + k) * maxk, sizeof(float), maxk, bp);
909                     }
910                 }
911             }
912 
913             for (int j = 1; j < binlayer.blobs_size(); j++)
914             {
915                 const caffe::BlobProto& blob = binlayer.blobs(j);
916                 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
917             }
918         }
919         else if (layer.type() == "DetectionOutput")
920         {
921             const caffe::DetectionOutputParameter& detection_output_param = layer.detection_output_param();
922             const caffe::NonMaximumSuppressionParameter& nms_param = detection_output_param.nms_param();
923             fprintf(pp, " 0=%d", detection_output_param.num_classes());
924             fprintf(pp, " 1=%e", nms_param.nms_threshold());
925             fprintf(pp, " 2=%d", nms_param.top_k());
926             fprintf(pp, " 3=%d", detection_output_param.keep_top_k());
927             fprintf(pp, " 4=%e", detection_output_param.confidence_threshold());
928         }
929         else if (layer.type() == "Dropout")
930         {
931             const caffe::DropoutParameter& dropout_param = layer.dropout_param();
932             if (dropout_param.has_scale_train() && !dropout_param.scale_train())
933             {
934                 float scale = 1.f - dropout_param.dropout_ratio();
935                 fprintf(pp, " 0=%e", scale);
936             }
937         }
938         else if (layer.type() == "Eltwise")
939         {
940             const caffe::EltwiseParameter& eltwise_param = layer.eltwise_param();
941             int coeff_size = eltwise_param.coeff_size();
942             fprintf(pp, " 0=%d", (int)eltwise_param.operation());
943             fprintf(pp, " -23301=%d", coeff_size);
944             for (int j = 0; j < coeff_size; j++)
945             {
946                 fprintf(pp, ",%e", eltwise_param.coeff(j));
947             }
948         }
949         else if (layer.type() == "ELU")
950         {
951             const caffe::ELUParameter& elu_param = layer.elu_param();
952             fprintf(pp, " 0=%e", elu_param.alpha());
953         }
954         else if (layer.type() == "Embed")
955         {
956             const caffe::LayerParameter& binlayer = net.layer(netidx);
957 
958             const caffe::BlobProto& weight_blob = binlayer.blobs(0);
959             const caffe::EmbedParameter& embed_param = layer.embed_param();
960             fprintf(pp, " 0=%d", embed_param.num_output());
961             fprintf(pp, " 1=%d", embed_param.input_dim());
962             fprintf(pp, " 2=%d", embed_param.bias_term());
963             fprintf(pp, " 3=%d", weight_blob.data_size());
964 
965             for (int j = 0; j < binlayer.blobs_size(); j++)
966             {
967                 int quantize_tag = 0;
968                 const caffe::BlobProto& blob = binlayer.blobs(j);
969 
970                 std::vector<float> quantize_table;
971                 std::vector<unsigned char> quantize_index;
972 
973                 std::vector<unsigned short> float16_weights;
974 
975                 // we will not quantize the bias values
976                 if (j == 0 && quantize_level != 0)
977                 {
978                     if (quantize_level == 256)
979                     {
980                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
981                     }
982                     else if (quantize_level == 65536)
983                     {
984                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
985                     }
986                 }
987 
988                 // write quantize tag first
989                 if (j == 0)
990                     fwrite(&quantize_tag, sizeof(int), 1, bp);
991 
992                 if (quantize_tag)
993                 {
994                     int p0 = ftell(bp);
995                     if (quantize_level == 256)
996                     {
997                         // write quantize table and index
998                         fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
999                         fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
1000                     }
1001                     else if (quantize_level == 65536)
1002                     {
1003                         fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
1004                     }
1005                     // padding to 32bit align
1006                     int nwrite = ftell(bp) - p0;
1007                     int nalign = int(alignSize(nwrite, 4));
1008                     unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
1009                     fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
1010                 }
1011                 else
1012                 {
1013                     // write original data
1014                     fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1015                 }
1016             }
1017         }
1018         else if (layer.type() == "InnerProduct")
1019         {
1020             const caffe::LayerParameter& binlayer = net.layer(netidx);
1021 
1022             const caffe::BlobProto& weight_blob = binlayer.blobs(0);
1023             const caffe::InnerProductParameter& inner_product_param = layer.inner_product_param();
1024             fprintf(pp, " 0=%d", inner_product_param.num_output());
1025             fprintf(pp, " 1=%d", inner_product_param.bias_term());
1026             fprintf(pp, " 2=%d", weight_blob.data_size());
1027 
1028             bool int8_scale_term = false;
1029             std::vector<float> weight_int8scale;
1030             std::vector<float> blob_int8scale;
1031 
1032             if (int8scale_table_path)
1033             {
1034                 char key[256];
1035                 sprintf(key, "%s_param_0", layer.name().c_str());
1036                 if (weight_int8scale_table.find(std::string(key)) != weight_int8scale_table.end())
1037                 {
1038                     weight_int8scale = weight_int8scale_table[std::string(key)];
1039                 }
1040 
1041                 if (blob_int8scale_table.find(layer.name()) != blob_int8scale_table.end())
1042                 {
1043                     blob_int8scale = blob_int8scale_table[layer.name()];
1044                 }
1045 
1046                 int8_scale_term = !weight_int8scale.empty() && !blob_int8scale.empty();
1047 
1048                 if (int8_scale_term)
1049                 {
1050                     fprintf(pp, " 8=1");
1051                 }
1052             }
1053 
1054             for (int j = 0; j < binlayer.blobs_size(); j++)
1055             {
1056                 int quantize_tag = 0;
1057                 const caffe::BlobProto& blob = binlayer.blobs(j);
1058 
1059                 std::vector<float> quantize_table;
1060                 std::vector<unsigned char> quantize_index;
1061 
1062                 std::vector<unsigned short> float16_weights;
1063                 std::vector<signed char> int8_weights;
1064 
1065                 // we will not quantize the bias values
1066                 if (j == 0)
1067                 {
1068                     if (int8_scale_term)
1069                     {
1070                         if (quantize_level == 0)
1071                         {
1072                             quantize_tag = 0x0002C056;
1073                         }
1074                         else if (quantize_level == 256)
1075                         {
1076                             quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), weight_int8scale, int8_weights);
1077                         }
1078                     }
1079                     else if (quantize_level == 256)
1080                     {
1081                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
1082                     }
1083                     else if (quantize_level == 65536)
1084                     {
1085                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
1086                     }
1087 
1088                     // write quantize tag first
1089                     fwrite(&quantize_tag, sizeof(int), 1, bp);
1090 
1091                     if (quantize_tag)
1092                     {
1093                         int p0 = ftell(bp);
1094                         if (int8_scale_term)
1095                         {
1096                             if (quantize_level == 0)
1097                             {
1098                                 // write original data and int8scale
1099                                 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1100                             }
1101                             else if (quantize_level == 256)
1102                             {
1103                                 fwrite(int8_weights.data(), sizeof(signed char), int8_weights.size(), bp);
1104                             }
1105                         }
1106                         else if (quantize_level == 256)
1107                         {
1108                             // write quantize table and index
1109                             fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
1110                             fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
1111                         }
1112                         else if (quantize_level == 65536)
1113                         {
1114                             fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
1115                         }
1116 
1117                         // padding to 32bit align
1118                         int nwrite = ftell(bp) - p0;
1119                         int nalign = int(alignSize(nwrite, 4));
1120                         unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
1121                         fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
1122                     }
1123                     else
1124                     {
1125                         // write original data
1126                         fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1127                     }
1128                 }
1129                 else
1130                 {
1131                     // write original data
1132                     fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1133                 }
1134             }
1135 
1136             if (int8_scale_term)
1137             {
1138                 // write int8_scale data
1139                 fwrite(weight_int8scale.data(), sizeof(float), weight_int8scale.size(), bp);
1140                 fwrite(blob_int8scale.data(), sizeof(float), blob_int8scale.size(), bp);
1141             }
1142         }
1143         else if (layer.type() == "Input")
1144         {
1145             const caffe::InputParameter& input_param = layer.input_param();
1146             const caffe::BlobShape& bs = input_param.shape(0);
1147             if (bs.dim_size() == 4)
1148             {
1149                 fprintf(pp, " 0=%zd", size_t(bs.dim(3)));
1150                 fprintf(pp, " 1=%zd", size_t(bs.dim(2)));
1151                 fprintf(pp, " 2=%zd", size_t(bs.dim(1)));
1152             }
1153             else if (bs.dim_size() == 3)
1154             {
1155                 fprintf(pp, " 0=%zd", size_t(bs.dim(2)));
1156                 fprintf(pp, " 1=%zd", size_t(bs.dim(1)));
1157                 fprintf(pp, " 2=-233");
1158             }
1159             else if (bs.dim_size() == 2)
1160             {
1161                 fprintf(pp, " 0=%zd", size_t(bs.dim(1)));
1162                 fprintf(pp, " 1=-233");
1163                 fprintf(pp, " 2=-233");
1164             }
1165         }
1166         else if (layer.type() == "Interp")
1167         {
1168             const caffe::InterpParameter& interp_param = layer.interp_param();
1169             fprintf(pp, " 0=%d", 2);
1170             fprintf(pp, " 1=%e", (float)interp_param.zoom_factor());
1171             fprintf(pp, " 2=%e", (float)interp_param.zoom_factor());
1172             fprintf(pp, " 3=%d", interp_param.height());
1173             fprintf(pp, " 4=%d", interp_param.width());
1174         }
1175         else if (layer.type() == "LRN")
1176         {
1177             const caffe::LRNParameter& lrn_param = layer.lrn_param();
1178             fprintf(pp, " 0=%d", lrn_param.norm_region());
1179             fprintf(pp, " 1=%d", lrn_param.local_size());
1180             fprintf(pp, " 2=%e", lrn_param.alpha());
1181             fprintf(pp, " 3=%e", lrn_param.beta());
1182         }
1183         else if (layer.type() == "LSTM")
1184         {
1185             const caffe::LayerParameter& binlayer = net.layer(netidx);
1186 
1187             const caffe::BlobProto& weight_blob = binlayer.blobs(0);
1188             const caffe::RecurrentParameter& recurrent_param = layer.recurrent_param();
1189             fprintf(pp, " 0=%d", recurrent_param.num_output());
1190             fprintf(pp, " 1=%d", weight_blob.data_size());
1191 
1192             for (int j = 0; j < binlayer.blobs_size(); j++)
1193             {
1194                 int quantize_tag = 0;
1195                 const caffe::BlobProto& blob = binlayer.blobs(j);
1196 
1197                 std::vector<float> quantize_table;
1198                 std::vector<unsigned char> quantize_index;
1199 
1200                 std::vector<unsigned short> float16_weights;
1201 
1202                 if (quantize_level != 0)
1203                 {
1204                     if (quantize_level == 256)
1205                     {
1206                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), quantize_level, quantize_table, quantize_index);
1207                     }
1208                     else if (quantize_level == 65536)
1209                     {
1210                         quantize_tag = quantize_weight((float*)blob.data().data(), blob.data_size(), float16_weights);
1211                     }
1212                 }
1213 
1214                 // write quantize tag first
1215                 fwrite(&quantize_tag, sizeof(int), 1, bp);
1216 
1217                 if (quantize_tag)
1218                 {
1219                     int p0 = ftell(bp);
1220                     if (quantize_level == 256)
1221                     {
1222                         // write quantize table and index
1223                         fwrite(quantize_table.data(), sizeof(float), quantize_table.size(), bp);
1224                         fwrite(quantize_index.data(), sizeof(unsigned char), quantize_index.size(), bp);
1225                     }
1226                     else if (quantize_level == 65536)
1227                     {
1228                         fwrite(float16_weights.data(), sizeof(unsigned short), float16_weights.size(), bp);
1229                     }
1230                     // padding to 32bit align
1231                     int nwrite = ftell(bp) - p0;
1232                     int nalign = int(alignSize(nwrite, 4));
1233                     unsigned char padding[4] = {0x00, 0x00, 0x00, 0x00};
1234                     fwrite(padding, sizeof(unsigned char), nalign - nwrite, bp);
1235                 }
1236                 else
1237                 {
1238                     // write original data
1239                     fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1240                 }
1241             }
1242         }
1243         else if (layer.type() == "MemoryData")
1244         {
1245             const caffe::MemoryDataParameter& memory_data_param = layer.memory_data_param();
1246             fprintf(pp, " 0=%d", memory_data_param.width());
1247             fprintf(pp, " 1=%d", memory_data_param.height());
1248             fprintf(pp, " 2=%d", memory_data_param.channels());
1249         }
1250         else if (layer.type() == "MVN")
1251         {
1252             const caffe::MVNParameter& mvn_param = layer.mvn_param();
1253             fprintf(pp, " 0=%d", mvn_param.normalize_variance());
1254             fprintf(pp, " 1=%d", mvn_param.across_channels());
1255             fprintf(pp, " 2=%e", mvn_param.eps());
1256         }
1257         else if (layer.type() == "Normalize")
1258         {
1259             const caffe::LayerParameter& binlayer = net.layer(netidx);
1260             const caffe::BlobProto& scale_blob = binlayer.blobs(0);
1261             const caffe::NormalizeParameter& norm_param = layer.norm_param();
1262             fprintf(pp, " 0=%d", norm_param.across_spatial());
1263             fprintf(pp, " 1=%d", norm_param.channel_shared());
1264             fprintf(pp, " 2=%e", norm_param.eps());
1265             fprintf(pp, " 3=%d", scale_blob.data_size());
1266 
1267             fwrite(scale_blob.data().data(), sizeof(float), scale_blob.data_size(), bp);
1268         }
1269         else if (layer.type() == "Permute")
1270         {
1271             const caffe::PermuteParameter& permute_param = layer.permute_param();
1272             int order_size = permute_param.order_size();
1273             int order_type = 0;
1274             if (order_size == 0)
1275                 order_type = 0;
1276             if (order_size == 1)
1277             {
1278                 int order0 = permute_param.order(0);
1279                 if (order0 == 0)
1280                     order_type = 0;
1281                 // permute with N not supported
1282             }
1283             if (order_size == 2)
1284             {
1285                 int order0 = permute_param.order(0);
1286                 int order1 = permute_param.order(1);
1287                 if (order0 == 0)
1288                 {
1289                     if (order1 == 1) // 0 1 2 3
1290                         order_type = 0;
1291                     else if (order1 == 2) // 0 2 1 3
1292                         order_type = 2;
1293                     else if (order1 == 3) // 0 3 1 2
1294                         order_type = 4;
1295                 }
1296                 // permute with N not supported
1297             }
1298             if (order_size == 3 || order_size == 4)
1299             {
1300                 int order0 = permute_param.order(0);
1301                 int order1 = permute_param.order(1);
1302                 int order2 = permute_param.order(2);
1303                 if (order0 == 0)
1304                 {
1305                     if (order1 == 1)
1306                     {
1307                         if (order2 == 2) // 0 1 2 3
1308                             order_type = 0;
1309                         if (order2 == 3) // 0 1 3 2
1310                             order_type = 1;
1311                     }
1312                     else if (order1 == 2)
1313                     {
1314                         if (order2 == 1) // 0 2 1 3
1315                             order_type = 2;
1316                         if (order2 == 3) // 0 2 3 1
1317                             order_type = 3;
1318                     }
1319                     else if (order1 == 3)
1320                     {
1321                         if (order2 == 1) // 0 3 1 2
1322                             order_type = 4;
1323                         if (order2 == 2) // 0 3 2 1
1324                             order_type = 5;
1325                     }
1326                 }
1327                 // permute with N not supported
1328             }
1329             fprintf(pp, " 0=%d", order_type);
1330         }
1331         else if (layer.type() == "Pooling")
1332         {
1333             const caffe::PoolingParameter& pooling_param = layer.pooling_param();
1334             fprintf(pp, " 0=%d", pooling_param.pool());
1335             if (pooling_param.has_kernel_w() && pooling_param.has_kernel_h())
1336             {
1337                 fprintf(pp, " 1=%d", pooling_param.kernel_w());
1338                 fprintf(pp, " 11=%d", pooling_param.kernel_h());
1339             }
1340             else
1341             {
1342                 fprintf(pp, " 1=%d", pooling_param.kernel_size());
1343             }
1344             if (pooling_param.has_stride_w() && pooling_param.has_stride_h())
1345             {
1346                 fprintf(pp, " 2=%d", pooling_param.stride_w());
1347                 fprintf(pp, " 12=%d", pooling_param.stride_h());
1348             }
1349             else
1350             {
1351                 fprintf(pp, " 2=%d", pooling_param.stride());
1352             }
1353             if (pooling_param.has_pad_w() && pooling_param.has_pad_h())
1354             {
1355                 fprintf(pp, " 3=%d", pooling_param.pad_w());
1356                 fprintf(pp, " 13=%d", pooling_param.pad_h());
1357             }
1358             else
1359             {
1360                 fprintf(pp, " 3=%d", pooling_param.pad());
1361             }
1362             fprintf(pp, " 4=%d", pooling_param.has_global_pooling() ? pooling_param.global_pooling() : 0);
1363         }
1364         else if (layer.type() == "Power")
1365         {
1366             const caffe::PowerParameter& power_param = layer.power_param();
1367             fprintf(pp, " 0=%e", power_param.power());
1368             fprintf(pp, " 1=%e", power_param.scale());
1369             fprintf(pp, " 2=%e", power_param.shift());
1370         }
1371         else if (layer.type() == "PReLU")
1372         {
1373             const caffe::LayerParameter& binlayer = net.layer(netidx);
1374             const caffe::BlobProto& slope_blob = binlayer.blobs(0);
1375             fprintf(pp, " 0=%d", slope_blob.data_size());
1376             fwrite(slope_blob.data().data(), sizeof(float), slope_blob.data_size(), bp);
1377         }
1378         else if (layer.type() == "PriorBox")
1379         {
1380             const caffe::PriorBoxParameter& prior_box_param = layer.prior_box_param();
1381 
1382             int num_aspect_ratio = prior_box_param.aspect_ratio_size();
1383             for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
1384             {
1385                 float ar = prior_box_param.aspect_ratio(j);
1386                 if (fabs(ar - 1.) < 1e-6)
1387                 {
1388                     num_aspect_ratio--;
1389                 }
1390             }
1391 
1392             float variances[4] = {0.1f, 0.1f, 0.1f, 0.1f};
1393             if (prior_box_param.variance_size() == 4)
1394             {
1395                 variances[0] = prior_box_param.variance(0);
1396                 variances[1] = prior_box_param.variance(1);
1397                 variances[2] = prior_box_param.variance(2);
1398                 variances[3] = prior_box_param.variance(3);
1399             }
1400             else if (prior_box_param.variance_size() == 1)
1401             {
1402                 variances[0] = prior_box_param.variance(0);
1403                 variances[1] = prior_box_param.variance(0);
1404                 variances[2] = prior_box_param.variance(0);
1405                 variances[3] = prior_box_param.variance(0);
1406             }
1407 
1408             int flip = prior_box_param.has_flip() ? prior_box_param.flip() : 1;
1409             int clip = prior_box_param.has_clip() ? prior_box_param.clip() : 0;
1410             int image_width = -233;
1411             int image_height = -233;
1412             if (prior_box_param.has_img_size())
1413             {
1414                 image_width = prior_box_param.img_size();
1415                 image_height = prior_box_param.img_size();
1416             }
1417             else if (prior_box_param.has_img_w() && prior_box_param.has_img_h())
1418             {
1419                 image_width = prior_box_param.img_w();
1420                 image_height = prior_box_param.img_h();
1421             }
1422 
1423             float step_width = -233;
1424             float step_height = -233;
1425             if (prior_box_param.has_step())
1426             {
1427                 step_width = prior_box_param.step();
1428                 step_height = prior_box_param.step();
1429             }
1430             else if (prior_box_param.has_step_w() && prior_box_param.has_step_h())
1431             {
1432                 step_width = prior_box_param.step_w();
1433                 step_height = prior_box_param.step_h();
1434             }
1435 
1436             fprintf(pp, " -23300=%d", prior_box_param.min_size_size());
1437             for (int j = 0; j < prior_box_param.min_size_size(); j++)
1438             {
1439                 fprintf(pp, ",%e", prior_box_param.min_size(j));
1440             }
1441             fprintf(pp, " -23301=%d", prior_box_param.max_size_size());
1442             for (int j = 0; j < prior_box_param.max_size_size(); j++)
1443             {
1444                 fprintf(pp, ",%e", prior_box_param.max_size(j));
1445             }
1446             fprintf(pp, " -23302=%d", num_aspect_ratio);
1447             for (int j = 0; j < prior_box_param.aspect_ratio_size(); j++)
1448             {
1449                 float ar = prior_box_param.aspect_ratio(j);
1450                 if (fabs(ar - 1.) < 1e-6)
1451                 {
1452                     continue;
1453                 }
1454                 fprintf(pp, ",%e", ar);
1455             }
1456             fprintf(pp, " 3=%e", variances[0]);
1457             fprintf(pp, " 4=%e", variances[1]);
1458             fprintf(pp, " 5=%e", variances[2]);
1459             fprintf(pp, " 6=%e", variances[3]);
1460             fprintf(pp, " 7=%d", flip);
1461             fprintf(pp, " 8=%d", clip);
1462             fprintf(pp, " 9=%d", image_width);
1463             fprintf(pp, " 10=%d", image_height);
1464             fprintf(pp, " 11=%e", step_width);
1465             fprintf(pp, " 12=%e", step_height);
1466             fprintf(pp, " 13=%e", prior_box_param.offset());
1467         }
1468         else if (layer.type() == "PSROIPooling")
1469         {
1470             const caffe::PSROIPoolingParameter& psroi_pooling_param = layer.psroi_pooling_param();
1471             fprintf(pp, " 0=%d", psroi_pooling_param.group_size());
1472             fprintf(pp, " 1=%d", psroi_pooling_param.group_size());
1473             fprintf(pp, " 2=%e", psroi_pooling_param.spatial_scale());
1474             fprintf(pp, " 3=%d", psroi_pooling_param.output_dim());
1475         }
1476         else if (layer.type() == "Python")
1477         {
1478             const caffe::PythonParameter& python_param = layer.python_param();
1479             std::string python_layer_name = python_param.layer();
1480             if (python_layer_name == "ProposalLayer")
1481             {
1482                 int feat_stride = 16;
1483                 sscanf(python_param.param_str().c_str(), "'feat_stride': %d", &feat_stride);
1484 
1485                 int base_size = 16;
1486                 //                 float ratio;
1487                 //                 float scale;
1488                 int pre_nms_topN = 6000;
1489                 int after_nms_topN = 300;
1490                 float nms_thresh = 0.7f;
1491                 int min_size = 16;
1492                 fprintf(pp, " 0=%d", feat_stride);
1493                 fprintf(pp, " 1=%d", base_size);
1494                 fprintf(pp, " 2=%d", pre_nms_topN);
1495                 fprintf(pp, " 3=%d", after_nms_topN);
1496                 fprintf(pp, " 4=%e", nms_thresh);
1497                 fprintf(pp, " 5=%d", min_size);
1498             }
1499         }
1500         else if (layer.type() == "ReLU")
1501         {
1502             const caffe::ReLUParameter& relu_param = layer.relu_param();
1503             if (relu_param.has_negative_slope())
1504             {
1505                 fprintf(pp, " 0=%e", relu_param.negative_slope());
1506             }
1507         }
1508         else if (layer.type() == "ReLU6")
1509         {
1510             float min = 0.f;
1511             float max = 6.f;
1512             fprintf(pp, " 0=%e", min);
1513             fprintf(pp, " 1=%e", max);
1514         }
1515         else if (layer.type() == "Reorg")
1516         {
1517             const caffe::ReorgParameter& reorg_param = layer.reorg_param();
1518             fprintf(pp, " 0=%d", reorg_param.stride());
1519         }
1520         else if (layer.type() == "Reshape")
1521         {
1522             const caffe::ReshapeParameter& reshape_param = layer.reshape_param();
1523             const caffe::BlobShape& bs = reshape_param.shape();
1524             if (bs.dim_size() == 1)
1525             {
1526                 fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(0)));
1527             }
1528             else if (bs.dim_size() == 2)
1529             {
1530                 fprintf(pp, " 0=%zd 1=-233 2=-233", size_t(bs.dim(1)));
1531             }
1532             else if (bs.dim_size() == 3)
1533             {
1534                 fprintf(pp, " 0=%zd 1=%zd 2=-233", size_t(bs.dim(2)), size_t(bs.dim(1)));
1535             }
1536             else // bs.dim_size() == 4
1537             {
1538                 fprintf(pp, " 0=%zd 1=%zd 2=%zd", size_t(bs.dim(3)), size_t(bs.dim(2)), size_t(bs.dim(1)));
1539             }
1540             fprintf(pp, " 3=0"); // permute
1541         }
1542         else if (layer.type() == "ROIAlign")
1543         {
1544             const caffe::ROIAlignParameter& roi_align_param = layer.roi_align_param();
1545             fprintf(pp, " 0=%d", roi_align_param.pooled_w());
1546             fprintf(pp, " 1=%d", roi_align_param.pooled_h());
1547             fprintf(pp, " 2=%e", roi_align_param.spatial_scale());
1548             fprintf(pp, " 3=%d", 0);
1549             fprintf(pp, " 4=%d", false);
1550             fprintf(pp, " 5=%d", 0);
1551         }
1552         else if (layer.type() == "ROIPooling")
1553         {
1554             const caffe::ROIPoolingParameter& roi_pooling_param = layer.roi_pooling_param();
1555             fprintf(pp, " 0=%d", roi_pooling_param.pooled_w());
1556             fprintf(pp, " 1=%d", roi_pooling_param.pooled_h());
1557             fprintf(pp, " 2=%e", roi_pooling_param.spatial_scale());
1558         }
1559         else if (layer.type() == "Scale")
1560         {
1561             const caffe::LayerParameter& binlayer = net.layer(netidx);
1562 
1563             const caffe::ScaleParameter& scale_param = layer.scale_param();
1564             bool scale_weight = scale_param.bias_term() ? (binlayer.blobs_size() == 2) : (binlayer.blobs_size() == 1);
1565             if (scale_weight)
1566             {
1567                 const caffe::BlobProto& weight_blob = binlayer.blobs(0);
1568                 fprintf(pp, " 0=%d", int(weight_blob.data_size()));
1569             }
1570             else
1571             {
1572                 fprintf(pp, " 0=-233");
1573             }
1574 
1575             fprintf(pp, " 1=%d", scale_param.bias_term());
1576 
1577             for (int j = 0; j < binlayer.blobs_size(); j++)
1578             {
1579                 const caffe::BlobProto& blob = binlayer.blobs(j);
1580                 fwrite(blob.data().data(), sizeof(float), blob.data_size(), bp);
1581             }
1582         }
1583         else if (layer.type() == "ShuffleChannel")
1584         {
1585             const caffe::ShuffleChannelParameter& shuffle_channel_param = layer.shuffle_channel_param();
1586             fprintf(pp, " 0=%d", shuffle_channel_param.group());
1587         }
1588         else if (layer.type() == "Slice")
1589         {
1590             const caffe::SliceParameter& slice_param = layer.slice_param();
1591             if (slice_param.slice_point_size() == 0)
1592             {
1593                 int num_slice = layer.top_size();
1594                 fprintf(pp, " -23300=%d", num_slice);
1595                 for (int j = 0; j < num_slice; j++)
1596                 {
1597                     fprintf(pp, ",-233");
1598                 }
1599             }
1600             else
1601             {
1602                 int num_slice = slice_param.slice_point_size() + 1;
1603                 fprintf(pp, " -23300=%d", num_slice);
1604                 int prev_offset = 0;
1605                 for (int j = 0; j < slice_param.slice_point_size(); j++)
1606                 {
1607                     int offset = slice_param.slice_point(j);
1608                     fprintf(pp, ",%d", offset - prev_offset);
1609                     prev_offset = offset;
1610                 }
1611                 fprintf(pp, ",-233");
1612             }
1613             int axis = 0;
1614             if (slice_param.has_axis())
1615             {
1616                 axis = slice_param.axis() - 1;
1617             }
1618             else if (slice_param.has_slice_dim())
1619             {
1620                 axis = slice_param.slice_dim() - 1;
1621             }
1622             fprintf(pp, " 1=%d", axis);
1623         }
1624         else if (layer.type() == "Softmax")
1625         {
1626             const caffe::SoftmaxParameter& softmax_param = layer.softmax_param();
1627             int dim = softmax_param.axis() - 1;
1628             fprintf(pp, " 0=%d", dim);
1629             fprintf(pp, " 1=1");
1630         }
1631         else if (layer.type() == "Threshold")
1632         {
1633             const caffe::ThresholdParameter& threshold_param = layer.threshold_param();
1634             fprintf(pp, " 0=%e", threshold_param.threshold());
1635         }
1636         else if (layer.type() == "YoloDetectionOutput")
1637         {
1638             const caffe::YoloDetectionOutputParameter& yolo_detection_output_param = layer.yolo_detection_output_param();
1639 
1640             fprintf(pp, " 0=%d", yolo_detection_output_param.num_classes());
1641             fprintf(pp, " 1=%d", yolo_detection_output_param.num_box());
1642             fprintf(pp, " 2=%e", yolo_detection_output_param.confidence_threshold());
1643             fprintf(pp, " 3=%e", yolo_detection_output_param.nms_threshold());
1644 
1645             int num_bias = yolo_detection_output_param.biases_size();
1646             fprintf(pp, " -23304=%d", num_bias);
1647             for (int j = 0; j < num_bias; j++)
1648             {
1649                 fprintf(pp, ",%e", yolo_detection_output_param.biases(j));
1650             }
1651         }
1652         else if (layer.type() == "Yolov3DetectionOutput")
1653         {
1654             const caffe::Yolov3DetectionOutputParameter& yolov3_detection_output_param = layer.yolov3_detection_output_param();
1655 
1656             fprintf(pp, " 0=%d", yolov3_detection_output_param.num_classes());
1657             fprintf(pp, " 1=%d", yolov3_detection_output_param.num_box());
1658             fprintf(pp, " 2=%e", yolov3_detection_output_param.confidence_threshold());
1659             fprintf(pp, " 3=%e", yolov3_detection_output_param.nms_threshold());
1660 
1661             int num_bias = yolov3_detection_output_param.biases_size();
1662             fprintf(pp, " -23304=%d", num_bias);
1663             for (int j = 0; j < num_bias; j++)
1664             {
1665                 fprintf(pp, ",%e", yolov3_detection_output_param.biases(j));
1666             }
1667             int num_mask = yolov3_detection_output_param.mask_size();
1668             fprintf(pp, " -23305=%d", num_mask);
1669             for (int j = 0; j < num_mask; j++)
1670             {
1671                 fprintf(pp, ",%e", (float)yolov3_detection_output_param.mask(j));
1672             }
1673             int num_anchors = yolov3_detection_output_param.anchors_scale_size();
1674             fprintf(pp, " -23306=%d", num_anchors);
1675             for (int j = 0; j < num_anchors; j++)
1676             {
1677                 fprintf(pp, ",%e", (float)yolov3_detection_output_param.anchors_scale(j));
1678             }
1679             fprintf(pp, " 7=%d", yolov3_detection_output_param.mask_group_num());
1680         }
1681         fprintf(pp, "\n");
1682 
1683         // add split layer if top reference larger than one
1684         if (layer.bottom_size() == 1 && layer.top_size() == 1 && layer.bottom(0) == layer.top(0))
1685         {
1686             std::string blob_name = blob_name_decorated[layer.top(0)];
1687             if (bottom_reference.find(blob_name) != bottom_reference.end())
1688             {
1689                 int refcount = bottom_reference[blob_name];
1690                 if (refcount > 1)
1691                 {
1692                     char splitname[256];
1693                     sprintf(splitname, "splitncnn_%d", internal_split);
1694                     fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
1695                     fprintf(pp, " %s", blob_name.c_str());
1696 
1697                     for (int j = 0; j < refcount; j++)
1698                     {
1699                         fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
1700                     }
1701                     fprintf(pp, "\n");
1702 
1703                     internal_split++;
1704                 }
1705             }
1706         }
1707         else
1708         {
1709             for (int j = 0; j < layer.top_size(); j++)
1710             {
1711                 std::string blob_name = layer.top(j);
1712                 if (bottom_reference.find(blob_name) != bottom_reference.end())
1713                 {
1714                     int refcount = bottom_reference[blob_name];
1715                     if (refcount > 1)
1716                     {
1717                         char splitname[256];
1718                         sprintf(splitname, "splitncnn_%d", internal_split);
1719                         fprintf(pp, "%-16s %-16s %d %d", "Split", splitname, 1, refcount);
1720                         fprintf(pp, " %s", blob_name.c_str());
1721 
1722                         for (int j = 0; j < refcount; j++)
1723                         {
1724                             fprintf(pp, " %s_splitncnn_%d", blob_name.c_str(), j);
1725                         }
1726                         fprintf(pp, "\n");
1727 
1728                         internal_split++;
1729                     }
1730                 }
1731             }
1732         }
1733     }
1734 
1735     fclose(pp);
1736     fclose(bp);
1737 
1738     return 0;
1739 }
1740