1syntax = "proto2";
2
3package caffe;
4
5// Specifies the shape (dimensions) of a Blob.
6message BlobShape {
7  repeated int64 dim = 1 [packed = true];
8}
9
10message BlobProto {
11  optional BlobShape shape = 7;
12  repeated float data = 5 [packed = true];
13  repeated float diff = 6 [packed = true];
14  repeated double double_data = 8 [packed = true];
15  repeated double double_diff = 9 [packed = true];
16
17  // 4D dimensions -- deprecated.  Use "shape" instead.
18  optional int32 num = 1 [default = 0];
19  optional int32 channels = 2 [default = 0];
20  optional int32 height = 3 [default = 0];
21  optional int32 width = 4 [default = 0];
22}
23
24// The BlobProtoVector is simply a way to pass multiple blobproto instances
25// around.
26message BlobProtoVector {
27  repeated BlobProto blobs = 1;
28}
29
30message Datum {
31  optional int32 channels = 1;
32  optional int32 height = 2;
33  optional int32 width = 3;
34  // the actual image data, in bytes
35  optional bytes data = 4;
36  optional int32 label = 5;
37  // Optionally, the datum could also hold float data.
38  repeated float float_data = 6;
39  // If true data contains an encoded image that need to be decoded
40  optional bool encoded = 7 [default = false];
41  repeated int32 labels = 8;
42  optional float float_label = 9;
43  repeated float float_labels = 10;
44  optional bytes byte_labels = 11;
45}
46
47message MTCNNBBox {
48  optional float xmin = 1;
49  optional float ymin = 2;
50  optional float xmax = 3;
51  optional float ymax = 4;
52}
53
54message MTCNNDatum {
55  optional Datum datum = 1;
56  optional MTCNNBBox roi = 2;
57  repeated float pts = 3;
58  //optional int32 labels_size = 4 [default = 0];
59}
60// The label (display) name and label id.
61message LabelMapItem {
62  // Both name and label are required.
63  optional string name = 1;
64  optional int32 label = 2;
65  // display_name is optional.
66  optional string display_name = 3;
67}
68
69message LabelMap {
70  repeated LabelMapItem item = 1;
71}
72
73// Sample a bbox in the normalized space [0, 1] with provided constraints.
74message Sampler {
75  // Minimum scale of the sampled bbox.
76  optional float min_scale = 1 [default = 1.];
77  // Maximum scale of the sampled bbox.
78  optional float max_scale = 2 [default = 1.];
79
80  // Minimum aspect ratio of the sampled bbox.
81  optional float min_aspect_ratio = 3 [default = 1.];
82  // Maximum aspect ratio of the sampled bbox.
83  optional float max_aspect_ratio = 4 [default = 1.];
84}
85
86// Constraints for selecting sampled bbox.
87message SampleConstraint {
88  // Minimum Jaccard overlap between sampled bbox and all bboxes in
89  // AnnotationGroup.
90  optional float min_jaccard_overlap = 1;
91  // Maximum Jaccard overlap between sampled bbox and all bboxes in
92  // AnnotationGroup.
93  optional float max_jaccard_overlap = 2;
94
95  // Minimum coverage of sampled bbox by all bboxes in AnnotationGroup.
96  optional float min_sample_coverage = 3;
97  // Maximum coverage of sampled bbox by all bboxes in AnnotationGroup.
98  optional float max_sample_coverage = 4;
99
100  // Minimum coverage of all bboxes in AnnotationGroup by sampled bbox.
101  optional float min_object_coverage = 5;
102  // Maximum coverage of all bboxes in AnnotationGroup by sampled bbox.
103  optional float max_object_coverage = 6;
104}
105
106// Sample a batch of bboxes with provided constraints.
107message BatchSampler {
108  // Use original image as the source for sampling.
109  optional bool use_original_image = 1 [default = true];
110
111  // Constraints for sampling bbox.
112  optional Sampler sampler = 2;
113
114  // Constraints for determining if a sampled bbox is positive or negative.
115  optional SampleConstraint sample_constraint = 3;
116
117  // If provided, break when found certain number of samples satisfing the
118  // sample_constraint.
119  optional uint32 max_sample = 4;
120
121  // Maximum number of trials for sampling to avoid infinite loop.
122  optional uint32 max_trials = 5 [default = 100];
123}
124
125// Condition for emitting annotations.
126message EmitConstraint {
127  enum EmitType {
128    CENTER = 0;
129    MIN_OVERLAP = 1;
130  }
131  optional EmitType emit_type = 1 [default = CENTER];
132  // If emit_type is MIN_OVERLAP, provide the emit_overlap.
133  optional float emit_overlap = 2;
134}
135
136// The normalized bounding box [0, 1] w.r.t. the input image size.
137message NormalizedBBox {
138  optional float xmin = 1;
139  optional float ymin = 2;
140  optional float xmax = 3;
141  optional float ymax = 4;
142  optional int32 label = 5;
143  optional bool difficult = 6;
144  optional float score = 7;
145  optional float size = 8;
146}
147
148// Annotation for each object instance.
149message Annotation {
150  optional int32 instance_id = 1 [default = 0];
151  optional NormalizedBBox bbox = 2;
152}
153
154// Group of annotations for a particular label.
155message AnnotationGroup {
156  optional int32 group_label = 1;
157  repeated Annotation annotation = 2;
158}
159
160// An extension of Datum which contains "rich" annotations.
161message AnnotatedDatum {
162  enum AnnotationType {
163    BBOX = 0;
164  }
165  optional Datum datum = 1;
166  // If there are "rich" annotations, specify the type of annotation.
167  // Currently it only supports bounding box.
168  // If there are no "rich" annotations, use label in datum instead.
169  optional AnnotationType type = 2;
170  // Each group contains annotation for a particular class.
171  repeated AnnotationGroup annotation_group = 3;
172}
173
174message FillerParameter {
175  // The filler type.
176  optional string type = 1 [default = 'constant'];
177  optional float value = 2 [default = 0]; // the value in constant filler
178  optional float min = 3 [default = 0]; // the min value in uniform filler
179  optional float max = 4 [default = 1]; // the max value in uniform filler
180  optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
181  optional float std = 6 [default = 1]; // the std value in Gaussian filler
182  // The expected number of non-zero output weights for a given input in
183  // Gaussian filler -- the default -1 means don't perform sparsification.
184  optional int32 sparse = 7 [default = -1];
185  // Normalize the filler variance by fan_in, fan_out, or their average.
186  // Applies to 'xavier' and 'msra' fillers.
187  enum VarianceNorm {
188    FAN_IN = 0;
189    FAN_OUT = 1;
190    AVERAGE = 2;
191  }
192  optional VarianceNorm variance_norm = 8 [default = FAN_IN];
193}
194
195message NetParameter {
196  optional string name = 1; // consider giving the network a name
197  // DEPRECATED. See InputParameter. The input blobs to the network.
198  repeated string input = 3;
199  // DEPRECATED. See InputParameter. The shape of the input blobs.
200  repeated BlobShape input_shape = 8;
201
202  // 4D input dimensions -- deprecated.  Use "input_shape" instead.
203  // If specified, for each input blob there should be four
204  // values specifying the num, channels, height and width of the input blob.
205  // Thus, there should be a total of (4 * #input) numbers.
206  repeated int32 input_dim = 4;
207
208  // Whether the network will force every layer to carry out backward operation.
209  // If set False, then whether to carry out backward is determined
210  // automatically according to the net structure and learning rates.
211  optional bool force_backward = 5 [default = false];
212  // The current "state" of the network, including the phase, level, and stage.
213  // Some layers may be included/excluded depending on this state and the states
214  // specified in the layers' include and exclude fields.
215  optional NetState state = 6;
216
217  // Print debugging information about results while running Net::Forward,
218  // Net::Backward, and Net::Update.
219  optional bool debug_info = 7 [default = false];
220
221  // The layers that make up the net.  Each of their configurations, including
222  // connectivity and behavior, is specified as a LayerParameter.
223  repeated LayerParameter layer = 100;  // ID 100 so layers are printed last.
224
225  // DEPRECATED: use 'layer' instead.
226  repeated V1LayerParameter layers = 2;
227}
228
229// NOTE
230// Update the next available ID when you add a new SolverParameter field.
231//
232// SolverParameter next available ID: 42 (last added: layer_wise_reduce)
233message SolverParameter {
234  //////////////////////////////////////////////////////////////////////////////
235  // Specifying the train and test networks
236  //
237  // Exactly one train net must be specified using one of the following fields:
238  //     train_net_param, train_net, net_param, net
239  // One or more test nets may be specified using any of the following fields:
240  //     test_net_param, test_net, net_param, net
241  // If more than one test net field is specified (e.g., both net and
242  // test_net are specified), they will be evaluated in the field order given
243  // above: (1) test_net_param, (2) test_net, (3) net_param/net.
244  // A test_iter must be specified for each test_net.
245  // A test_level and/or a test_stage may also be specified for each test_net.
246  //////////////////////////////////////////////////////////////////////////////
247
248  // Proto filename for the train net, possibly combined with one or more
249  // test nets.
250  optional string net = 24;
251  // Inline train net param, possibly combined with one or more test nets.
252  optional NetParameter net_param = 25;
253
254  optional string train_net = 1; // Proto filename for the train net.
255  repeated string test_net = 2; // Proto filenames for the test nets.
256  optional NetParameter train_net_param = 21; // Inline train net params.
257  repeated NetParameter test_net_param = 22; // Inline test net params.
258
259  // The states for the train/test nets. Must be unspecified or
260  // specified once per net.
261  //
262  // By default, train_state will have phase = TRAIN,
263  // and all test_state's will have phase = TEST.
264  // Other defaults are set according to the NetState defaults.
265  optional NetState train_state = 26;
266  repeated NetState test_state = 27;
267
268  // Evaluation type.
269  optional string eval_type = 41 [default = "classification"];
270  // ap_version: different ways of computing Average Precision.
271  //    Check https://sanchom.wordpress.com/tag/average-precision/ for details.
272  //    11point: the 11-point interpolated average precision. Used in VOC2007.
273  //    MaxIntegral: maximally interpolated AP. Used in VOC2012/ILSVRC.
274  //    Integral: the natural integral of the precision-recall curve.
275  optional string ap_version = 42 [default = "Integral"];
276  // If true, display per class result.
277  optional bool show_per_class_result = 44 [default = false];
278
279  // The number of iterations for each test net.
280  repeated int32 test_iter = 3;
281
282  // The number of iterations between two testing phases.
283  optional int32 test_interval = 4 [default = 0];
284  optional bool test_compute_loss = 19 [default = false];
285  // If true, run an initial test pass before the first iteration,
286  // ensuring memory availability and printing the starting value of the loss.
287  optional bool test_initialization = 32 [default = true];
288  optional float base_lr = 5; // The base learning rate
289  // the number of iterations between displaying info. If display = 0, no info
290  // will be displayed.
291  optional int32 display = 6;
292  // Display the loss averaged over the last average_loss iterations
293  optional int32 average_loss = 33 [default = 1];
294  optional int32 max_iter = 7; // the maximum number of iterations
295  // accumulate gradients over `iter_size` x `batch_size` instances
296  optional int32 iter_size = 36 [default = 1];
297
298  // The learning rate decay policy. The currently implemented learning rate
299  // policies are as follows:
300  //    - fixed: always return base_lr.
301  //    - step: return base_lr * gamma ^ (floor(iter / step))
302  //    - exp: return base_lr * gamma ^ iter
303  //    - inv: return base_lr * (1 + gamma * iter) ^ (- power)
304  //    - multistep: similar to step but it allows non uniform steps defined by
305  //      stepvalue
306  //    - poly: the effective learning rate follows a polynomial decay, to be
307  //      zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power)
308  //    - sigmoid: the effective learning rate follows a sigmod decay
309  //      return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize))))
310  //
311  // where base_lr, max_iter, gamma, step, stepvalue and power are defined
312  // in the solver parameter protocol buffer, and iter is the current iteration.
313  optional string lr_policy = 8;
314  optional float gamma = 9; // The parameter to compute the learning rate.
315  optional float power = 10; // The parameter to compute the learning rate.
316  optional float momentum = 11; // The momentum value.
317  optional float weight_decay = 12; // The weight decay.
318  // regularization types supported: L1 and L2
319  // controlled by weight_decay
320  optional string regularization_type = 29 [default = "L2"];
321  // the stepsize for learning rate policy "step"
322  optional int32 stepsize = 13;
323  // the stepsize for learning rate policy "multistep"
324  repeated int32 stepvalue = 34;
325
326  // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm,
327  // whenever their actual L2 norm is larger.
328  optional float clip_gradients = 35 [default = -1];
329
330  optional int32 snapshot = 14 [default = 0]; // The snapshot interval
331  optional string snapshot_prefix = 15; // The prefix for the snapshot.
332  // whether to snapshot diff in the results or not. Snapshotting diff will help
333  // debugging but the final protocol buffer size will be much larger.
334  optional bool snapshot_diff = 16 [default = false];
335  enum SnapshotFormat {
336    HDF5 = 0;
337    BINARYPROTO = 1;
338  }
339  optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO];
340  // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default.
341  enum SolverMode {
342    CPU = 0;
343    GPU = 1;
344  }
345  optional SolverMode solver_mode = 17 [default = GPU];
346  // the device_id will that be used in GPU mode. Use device_id = 0 in default.
347  optional int32 device_id = 18 [default = 0];
348  // If non-negative, the seed with which the Solver will initialize the Caffe
349  // random number generator -- useful for reproducible results. Otherwise,
350  // (and by default) initialize using a seed derived from the system clock.
351  optional int64 random_seed = 20 [default = -1];
352
353  // type of the solver
354  optional string type = 40 [default = "SGD"];
355
356  // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam
357  optional float delta = 31 [default = 1e-8];
358  // parameters for the Adam solver
359  optional float momentum2 = 39 [default = 0.999];
360
361  // RMSProp decay value
362  // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t)
363  optional float rms_decay = 38 [default = 0.99];
364
365  // If true, print information about the state of the net that may help with
366  // debugging learning problems.
367  optional bool debug_info = 23 [default = false];
368
369  // If false, don't save a snapshot after training finishes.
370  optional bool snapshot_after_train = 28 [default = true];
371
372  // DEPRECATED: old solver enum types, use string instead
373  enum SolverType {
374    SGD = 0;
375    NESTEROV = 1;
376    ADAGRAD = 2;
377    RMSPROP = 3;
378    ADADELTA = 4;
379    ADAM = 5;
380  }
381  // DEPRECATED: use type instead of solver_type
382  optional SolverType solver_type = 30 [default = SGD];
383
384  // Overlap compute and communication for data parallel training
385  optional bool layer_wise_reduce = 45 [default = true];
386}
387
388// A message that stores the solver snapshots
389message SolverState {
390  optional int32 iter = 1; // The current iteration
391  optional string learned_net = 2; // The file that stores the learned net.
392  repeated BlobProto history = 3; // The history for sgd solvers
393  optional int32 current_step = 4 [default = 0]; // The current step for learning rate
394}
395
396enum Phase {
397   TRAIN = 0;
398   TEST = 1;
399   QUAN = 2;
400   INT8 = 3;
401}
402
403message NetState {
404  optional Phase phase = 1 [default = TEST];
405  optional int32 level = 2 [default = 0];
406  repeated string stage = 3;
407}
408
409message NetStateRule {
410  // Set phase to require the NetState have a particular phase (TRAIN or TEST)
411  // to meet this rule.
412  optional Phase phase = 1;
413
414  // Set the minimum and/or maximum levels in which the layer should be used.
415  // Leave undefined to meet the rule regardless of level.
416  optional int32 min_level = 2;
417  optional int32 max_level = 3;
418
419  // Customizable sets of stages to include or exclude.
420  // The net must have ALL of the specified stages and NONE of the specified
421  // "not_stage"s to meet the rule.
422  // (Use multiple NetStateRules to specify conjunctions of stages.)
423  repeated string stage = 4;
424  repeated string not_stage = 5;
425}
426
427// Specifies training parameters (multipliers on global learning constants,
428// and the name and other settings used for weight sharing).
429message ParamSpec {
430  // The names of the parameter blobs -- useful for sharing parameters among
431  // layers, but never required otherwise.  To share a parameter between two
432  // layers, give it a (non-empty) name.
433  optional string name = 1;
434
435  // Whether to require shared weights to have the same shape, or just the same
436  // count -- defaults to STRICT if unspecified.
437  optional DimCheckMode share_mode = 2;
438  enum DimCheckMode {
439    // STRICT (default) requires that num, channels, height, width each match.
440    STRICT = 0;
441    // PERMISSIVE requires only the count (num*channels*height*width) to match.
442    PERMISSIVE = 1;
443  }
444
445  // The multiplier on the global learning rate for this parameter.
446  optional float lr_mult = 3 [default = 1.0];
447
448  // The multiplier on the global weight decay for this parameter.
449  optional float decay_mult = 4 [default = 1.0];
450}
451
452// NOTE
453// Update the next available ID when you add a new LayerParameter field.
454//
455// LayerParameter next available layer-specific ID: 149 (last added: Pooling3DParameter)
456message LayerParameter {
457  optional string name = 1; // the layer name
458  optional string type = 2; // the layer type
459  repeated string bottom = 3; // the name of each bottom blob
460  repeated string top = 4; // the name of each top blob
461
462  // The train / test phase for computation.
463  optional Phase phase = 10;
464
465  // The amount of weight to assign each top blob in the objective.
466  // Each layer assigns a default value, usually of either 0 or 1,
467  // to each top blob.
468  repeated float loss_weight = 5;
469
470  // Specifies training parameters (multipliers on global learning constants,
471  // and the name and other settings used for weight sharing).
472  repeated ParamSpec param = 6;
473
474  // The blobs containing the numeric parameters of the layer.
475  repeated BlobProto blobs = 7;
476
477  // Specifies whether to backpropagate to each bottom. If unspecified,
478  // Caffe will automatically infer whether each input needs backpropagation
479  // to compute parameter gradients. If set to true for some inputs,
480  // backpropagation to those inputs is forced; if set false for some inputs,
481  // backpropagation to those inputs is skipped.
482  //
483  // The size must be either 0 or equal to the number of bottoms.
484  repeated bool propagate_down = 11;
485
486  // Rules controlling whether and when a layer is included in the network,
487  // based on the current NetState.  You may specify a non-zero number of rules
488  // to include OR exclude, but not both.  If no include or exclude rules are
489  // specified, the layer is always included.  If the current NetState meets
490  // ANY (i.e., one or more) of the specified rules, the layer is
491  // included/excluded.
492  repeated NetStateRule include = 8;
493  repeated NetStateRule exclude = 9;
494
495  // Parameters for data pre-processing.
496  optional TransformationParameter transform_param = 100;
497
498  // Parameters shared by loss layers.
499  optional LossParameter loss_param = 101;
500
501  // Layer type-specific parameters.
502  //
503  // Note: certain layers may have more than one computational engine
504  // for their implementation. These layers include an Engine type and
505  // engine parameter for selecting the implementation.
506  // The default for the engine is set by the ENGINE switch at compile-time.
507  optional AccuracyParameter accuracy_param = 102;
508  optional AnnotatedDataParameter annotated_data_param = 200;
509  optional ArgMaxParameter argmax_param = 103;
510  optional BatchNormParameter batch_norm_param = 139;
511  optional BiasParameter bias_param = 141;
512  optional ConcatParameter concat_param = 104;
513  optional ContrastiveLossParameter contrastive_loss_param = 105;
514  optional ConvolutionParameter convolution_param = 106;
515  optional Convolution3DParameter convolution3d_param = 147;
516  optional CropParameter crop_param = 144;
517  optional DataParameter data_param = 107;
518  optional DetectionEvaluateParameter detection_evaluate_param = 205;
519  optional DetectionOutputParameter detection_output_param = 204;
520  optional DropoutParameter dropout_param = 108;
521  optional DummyDataParameter dummy_data_param = 109;
522  optional EltwiseParameter eltwise_param = 110;
523  optional ELUParameter elu_param = 140;
524  optional EmbedParameter embed_param = 137;
525  optional ExpParameter exp_param = 111;
526  optional FlattenParameter flatten_param = 135;
527  optional HDF5DataParameter hdf5_data_param = 112;
528  optional HDF5OutputParameter hdf5_output_param = 113;
529  optional HingeLossParameter hinge_loss_param = 114;
530  optional ImageDataParameter image_data_param = 115;
531  optional InfogainLossParameter infogain_loss_param = 116;
532  optional InnerProductParameter inner_product_param = 117;
533  optional InputParameter input_param = 143;
534  optional LogParameter log_param = 134;
535  optional LRNParameter lrn_param = 118;
536  optional LSTMParameter lstm_param = 210;
537  optional Im2seqParameter im2seq_param = 207;
538  optional Seq2outParameter seq2out_param = 208;
539  optional ImgSizeParameter img_size_param = 209;
540  optional MemoryDataParameter memory_data_param = 119;
541  optional MultiBoxLossParameter multibox_loss_param = 201;
542  optional MVNParameter mvn_param = 120;
543  optional NormalizeParameter norm_param = 206;
544  optional ParameterParameter parameter_param = 145;
545  optional PermuteParameter permute_param = 202;
546  optional PoolingParameter pooling_param = 121;
547  optional Pooling3DParameter pooling3d_param = 148;
548  optional PowerParameter power_param = 122;
549  optional PReLUParameter prelu_param = 131;
550  optional PriorBoxParameter prior_box_param = 203;
551  optional PythonParameter python_param = 130;
552  optional RecurrentParameter recurrent_param = 146;
553  optional ReductionParameter reduction_param = 136;
554  optional ReLUParameter relu_param = 123;
555  optional ReshapeParameter reshape_param = 133;
556  optional ScaleParameter scale_param = 142;
557  optional SigmoidParameter sigmoid_param = 124;
558  optional SoftmaxParameter softmax_param = 125;
559  optional SPPParameter spp_param = 132;
560  optional SliceParameter slice_param = 126;
561  optional TanHParameter tanh_param = 127;
562  optional ThresholdParameter threshold_param = 128;
563  optional TileParameter tile_param = 138;
564  optional WindowDataParameter window_data_param = 129;
565  optional CenterLossParameter center_loss_param = 211;
566  optional ShuffleChannelParameter shuffle_channel_param = 212;
567
568  optional InterpParameter interp_param = 2210;
569  optional ROIPoolingParameter roi_pooling_param = 2201;
570  optional ClipParameter clip_param = 2202;
571  optional UpsampleParameter upsample_param = 2203;
572}
573
574// Message that stores parameters used by ClipLayer
575message ClipParameter {
576  required float min = 1;
577  required float max = 2;
578}
579
580// Message that stores parameters used by ROIPoolingLayer
581message ROIPoolingParameter {
582  // Pad, kernel size, and stride are all given as a single value for equal
583  // dimensions in height and width or as Y, X pairs.
584  optional uint32 pooled_h = 1 [default = 0]; // The pooled output height
585  optional uint32 pooled_w = 2 [default = 0]; // The pooled output width
586  // Multiplicative spatial scale factor to translate ROI coords from their
587  // input scale to the scale used when pooling
588  optional float spatial_scale = 3 [default = 1];
589}
590
591message InterpParameter {
592  optional int32 height = 1 [default = 0]; // Height of output
593  optional int32 width = 2 [default = 0]; // Width of output
594  optional int32 zoom_factor = 3 [default = 1]; // zoom factor
595  optional int32 shrink_factor = 4 [default = 1]; // shrink factor
596  optional int32 pad_beg = 5 [default = 0]; // padding at begin of input
597  optional int32 pad_end = 6 [default = 0]; // padding at end of input
598}
599
600// Message that stores parameters used by LSTMParameter
601message LSTMParameter {
602  optional uint32 num_output = 1; // The number of outputs for the layer
603  optional float clipping_threshold = 2 [default = 0.0];
604  optional FillerParameter weight_filler = 3; // The filler for weight
605  optional FillerParameter bias_filler = 4; // The filler for the bias
606  optional uint32 batch_size = 5 [default = 1];
607  optional uint32 paramq = 6;
608  optional float scale_in = 7;
609  optional float scale_out = 8;
610  optional int32 a_min = 9 [default = -128];
611  optional int32 a_max = 10 [default = 127];
612}
613
614//Message that stores parameters used by Im2SeqParameter
615message Im2seqParameter {
616        enum Direction {
617                FORWARD = 0;
618                REVERSED = 1;
619                TOPDOWN = 2;
620                BOTTOMUP = 3;
621        }
622        optional Direction direction = 1 [default = FORWARD];
623        optional uint32 size = 2 [default = 1];
624        optional uint32 stride = 3 [default = 1];
625        optional bool line2time_loss = 4 [default = false];
626}
627
628message CenterLossParameter {
629  optional uint32 num_output = 1; // The number of outputs for the layer
630  optional FillerParameter center_filler = 2; // The filler for the centers
631  // The first axis to be lumped into a single inner product computation;
632  // all preceding axes are retained in the output.
633  // May be negative to index from the end (e.g., -1 for the last axis).
634  optional int32 axis = 3 [default = 1];
635  optional string distance_type = 29 [default = "L2"];
636}
637
638// Message that stores parameters used by Seq2outParameter
639message Seq2outParameter {
640}
641
642// Message that stores parameter used in variable size images
643message ImgSizeParameter {
644  optional float x_scaling = 1 [default = 1];
645  optional float y_scaling = 2 [default = 1];
646  // Parameters used to restore pretrain layers (ocr team defined)
647  optional string pretrained_file = 500;
648  optional string pretrained_layer_name = 501;
649  optional string pretrained_layer_bin = 502;
650}
651
652// Message that stores parameters used to apply transformation
653// to the data layer's data
654message TransformationParameter {
655  // For data pre-processing, we can do simple scaling and subtracting the
656  // data mean, if provided. Note that the mean subtraction is always carried
657  // out before scaling.
658  optional float scale = 1 [default = 1];
659  // Specify if we want to randomly mirror data.
660  optional bool mirror = 2 [default = false];
661  // Specify if we would like to randomly crop an image.
662  optional uint32 crop_size = 3 [default = 0];
663  optional uint32 crop_h = 11 [default = 0];
664  optional uint32 crop_w = 12 [default = 0];
665
666  // mean_file and mean_value cannot be specified at the same time
667  optional string mean_file = 4;
668  // if specified can be repeated once (would substract it from all the channels)
669  // or can be repeated the same number of times as channels
670  // (would subtract them from the corresponding channel)
671  repeated float mean_value = 5;
672  // Force the decoded image to have 3 color channels.
673  optional bool force_color = 6 [default = false];
674  // Force the decoded image to have 1 color channels.
675  optional bool force_gray = 7 [default = false];
676  // Resize policy
677  optional ResizeParameter resize_param = 8;
678  // Noise policy
679  optional NoiseParameter noise_param = 9;
680  // Distortion policy
681  optional DistortionParameter distort_param = 13;
682  // Expand policy
683  optional ExpansionParameter expand_param = 14;
684  // Constraint for emitting the annotation after transformation.
685  optional EmitConstraint emit_constraint = 10;
686}
687
688// Message that stores parameters used by data transformer for resize policy
689message ResizeParameter {
690  //Probability of using this resize policy
691  optional float prob = 1 [default = 1];
692
693  enum Resize_mode {
694    WARP = 1;
695    FIT_SMALL_SIZE = 2;
696    FIT_LARGE_SIZE_AND_PAD = 3;
697  }
698  optional Resize_mode resize_mode = 2 [default = WARP];
699  optional uint32 height = 3 [default = 0];
700  optional uint32 width = 4 [default = 0];
701  // A parameter used to update bbox in FIT_SMALL_SIZE mode.
702  optional uint32 height_scale = 8 [default = 0];
703  optional uint32 width_scale = 9 [default = 0];
704
705  enum Pad_mode {
706    CONSTANT = 1;
707    MIRRORED = 2;
708    REPEAT_NEAREST = 3;
709  }
710  // Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering
711  optional Pad_mode pad_mode = 5 [default = CONSTANT];
712  // if specified can be repeated once (would fill all the channels)
713  // or can be repeated the same number of times as channels
714  // (would use it them to the corresponding channel)
715  repeated float pad_value = 6;
716
717  enum Interp_mode { //Same as in OpenCV
718    LINEAR = 1;
719    AREA = 2;
720    NEAREST = 3;
721    CUBIC = 4;
722    LANCZOS4 = 5;
723  }
724  //interpolation for for resizing
725  repeated Interp_mode interp_mode = 7;
726}
727
728message SaltPepperParameter {
729  //Percentage of pixels
730  optional float fraction = 1 [default = 0];
731  repeated float value = 2;
732}
733
734// Message that stores parameters used by data transformer for transformation
735// policy
736message NoiseParameter {
737  //Probability of using this resize policy
738  optional float prob = 1 [default = 0];
739  // Histogram equalized
740  optional bool hist_eq = 2 [default = false];
741  // Color inversion
742  optional bool inverse = 3 [default = false];
743  // Grayscale
744  optional bool decolorize = 4 [default = false];
745  // Gaussian blur
746  optional bool gauss_blur = 5 [default = false];
747
748  // JPEG compression quality (-1 = no compression)
749  optional float jpeg = 6 [default = -1];
750
751  // Posterization
752  optional bool posterize = 7 [default = false];
753
754  // Erosion
755  optional bool erode = 8 [default = false];
756
757  // Salt-and-pepper noise
758  optional bool saltpepper = 9 [default = false];
759
760  optional SaltPepperParameter saltpepper_param = 10;
761
762  // Local histogram equalization
763  optional bool clahe = 11 [default = false];
764
765  // Color space conversion
766  optional bool convert_to_hsv = 12 [default = false];
767
768  // Color space conversion
769  optional bool convert_to_lab = 13 [default = false];
770}
771
772// Message that stores parameters used by data transformer for distortion policy
773message DistortionParameter {
774  // The probability of adjusting brightness.
775  optional float brightness_prob = 1 [default = 0.0];
776  // Amount to add to the pixel values within [-delta, delta].
777  // The possible value is within [0, 255]. Recommend 32.
778  optional float brightness_delta = 2 [default = 0.0];
779
780  // The probability of adjusting contrast.
781  optional float contrast_prob = 3 [default = 0.0];
782  // Lower bound for random contrast factor. Recommend 0.5.
783  optional float contrast_lower = 4 [default = 0.0];
784  // Upper bound for random contrast factor. Recommend 1.5.
785  optional float contrast_upper = 5 [default = 0.0];
786
787  // The probability of adjusting hue.
788  optional float hue_prob = 6 [default = 0.0];
789  // Amount to add to the hue channel within [-delta, delta].
790  // The possible value is within [0, 180]. Recommend 36.
791  optional float hue_delta = 7 [default = 0.0];
792
793  // The probability of adjusting saturation.
794  optional float saturation_prob = 8 [default = 0.0];
795  // Lower bound for the random saturation factor. Recommend 0.5.
796  optional float saturation_lower = 9 [default = 0.0];
797  // Upper bound for the random saturation factor. Recommend 1.5.
798  optional float saturation_upper = 10 [default = 0.0];
799
800  // The probability of randomly order the image channels.
801  optional float random_order_prob = 11 [default = 0.0];
802}
803
804// Message that stores parameters used by data transformer for expansion policy
805message ExpansionParameter {
806  //Probability of using this expansion policy
807  optional float prob = 1 [default = 1];
808
809  // The ratio to expand the image.
810  optional float max_expand_ratio = 2 [default = 1.];
811}
812
813// Message that stores parameters shared by loss layers
814message LossParameter {
815  // If specified, ignore instances with the given label.
816  optional int32 ignore_label = 1;
817  // How to normalize the loss for loss layers that aggregate across batches,
818  // spatial dimensions, or other dimensions.  Currently only implemented in
819  // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers.
820  enum NormalizationMode {
821    // Divide by the number of examples in the batch times spatial dimensions.
822    // Outputs that receive the ignore label will NOT be ignored in computing
823    // the normalization factor.
824    FULL = 0;
825    // Divide by the total number of output locations that do not take the
826    // ignore_label.  If ignore_label is not set, this behaves like FULL.
827    VALID = 1;
828    // Divide by the batch size.
829    BATCH_SIZE = 2;
830    // Do not normalize the loss.
831    NONE = 3;
832  }
833  // For historical reasons, the default normalization for
834  // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID.
835  optional NormalizationMode normalization = 3 [default = VALID];
836  // Deprecated.  Ignored if normalization is specified.  If normalization
837  // is not specified, then setting this to false will be equivalent to
838  // normalization = BATCH_SIZE to be consistent with previous behavior.
839  optional bool normalize = 2;
840  optional bool is_num_scale = 4 [default = true];
841}
842
843// Messages that store parameters used by individual layer types follow, in
844// alphabetical order.
845
846message AccuracyParameter {
847  // When computing accuracy, count as correct by comparing the true label to
848  // the top k scoring classes.  By default, only compare to the top scoring
849  // class (i.e. argmax).
850  optional uint32 top_k = 1 [default = 1];
851
852  // The "label" axis of the prediction blob, whose argmax corresponds to the
853  // predicted label -- may be negative to index from the end (e.g., -1 for the
854  // last axis).  For example, if axis == 1 and the predictions are
855  // (N x C x H x W), the label blob is expected to contain N*H*W ground truth
856  // labels with integer values in {0, 1, ..., C-1}.
857  optional int32 axis = 2 [default = 1];
858
859  // If specified, ignore instances with the given label.
860  optional int32 ignore_label = 3;
861}
862
863message AnnotatedDataParameter {
864  // Define the sampler.
865  repeated BatchSampler batch_sampler = 1;
866  // Store label name and label id in LabelMap format.
867  optional string label_map_file = 2;
868  // If provided, it will replace the AnnotationType stored in each
869  // AnnotatedDatum.
870  optional AnnotatedDatum.AnnotationType anno_type = 3;
871}
872
873message ArgMaxParameter {
874  // If true produce pairs (argmax, maxval)
875  optional bool out_max_val = 1 [default = false];
876  optional uint32 top_k = 2 [default = 1];
877  // The axis along which to maximise -- may be negative to index from the
878  // end (e.g., -1 for the last axis).
879  // By default ArgMaxLayer maximizes over the flattened trailing dimensions
880  // for each index of the first / num dimension.
881  optional int32 axis = 3;
882  optional bool softmax_threshold = 4 [default = false];
883}
884
885message ConcatParameter {
886  // The axis along which to concatenate -- may be negative to index from the
887  // end (e.g., -1 for the last axis).  Other axes must have the
888  // same dimension for all the bottom blobs.
889  // By default, ConcatLayer concatenates blobs along the "channels" axis (1).
890  optional int32 axis = 2 [default = 1];
891
892  // DEPRECATED: alias for "axis" -- does not support negative indexing.
893  optional uint32 concat_dim = 1 [default = 1];
894}
895
896message BatchNormParameter {
897  // If false, normalization is performed over the current mini-batch
898  // and global statistics are accumulated (but not yet used) by a moving
899  // average.
900  // If true, those accumulated mean and variance values are used for the
901  // normalization.
902  // By default, it is set to false when the network is in the training
903  // phase and true when the network is in the testing phase.
904  optional bool use_global_stats = 1;
905  // What fraction of the moving average remains each iteration?
906  // Smaller values make the moving average decay faster, giving more
907  // weight to the recent values.
908  // Each iteration updates the moving average @f$S_{t-1}@f$ with the
909  // current mean @f$ Y_t @f$ by
910  // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$
911  // is the moving_average_fraction parameter.
912  optional float moving_average_fraction = 2 [default = .999];
913  // Small value to add to the variance estimate so that we don't divide by
914  // zero.
915  optional float eps = 3 [default = 1e-5];
916  optional bool use_weight_bias = 5 [default = true];
917  optional bool bias_term = 6 [default = true]; // whether to have bias terms
918  optional FillerParameter filler = 7; // The filler for the weight
919  optional FillerParameter bias_filler = 8; // The filler for the bias
920  optional FillerParameter scale_filler = 9; // The filler for the bias
921}
922
923message BiasParameter {
924  // The first axis of bottom[0] (the first input Blob) along which to apply
925  // bottom[1] (the second input Blob).  May be negative to index from the end
926  // (e.g., -1 for the last axis).
927  //
928  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
929  // top[0] will have the same shape, and bottom[1] may have any of the
930  // following shapes (for the given value of axis):
931  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
932  //    (axis == 1 == -3)          3;     3x40;     3x40x60
933  //    (axis == 2 == -2)                   40;       40x60
934  //    (axis == 3 == -1)                                60
935  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
936  // "axis") -- a scalar bias.
937  optional int32 axis = 1 [default = 1];
938
939  // (num_axes is ignored unless just one bottom is given and the bias is
940  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
941  // number of axes by the second bottom.)
942  // The number of axes of the input (bottom[0]) covered by the bias
943  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
944  // Set num_axes := 0, to add a zero-axis Blob: a scalar.
945  optional int32 num_axes = 2 [default = 1];
946
947  // (filler is ignored unless just one bottom is given and the bias is
948  // a learned parameter of the layer.)
949  // The initialization for the learned bias parameter.
950  // Default is the zero (0) initialization, resulting in the BiasLayer
951  // initially performing the identity operation.
952  optional FillerParameter filler = 3;
953}
954
955message ContrastiveLossParameter {
956  // margin for dissimilar pair
957  optional float margin = 1 [default = 1.0];
958  // The first implementation of this cost did not exactly match the cost of
959  // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2.
960  // legacy_version = false (the default) uses (margin - d)^2 as proposed in the
961  // Hadsell paper. New models should probably use this version.
962  // legacy_version = true uses (margin - d^2). This is kept to support /
963  // reproduce existing models and results
964  optional bool legacy_version = 2 [default = false];
965}
966
967message ConvolutionParameter {
968  optional uint32 num_output = 1; // The number of outputs for the layer
969  optional bool bias_term = 2 [default = true]; // whether to have bias terms
970
971  // Pad, kernel size, and stride are all given as a single value for equal
972  // dimensions in all spatial dimensions, or once per spatial dimension.
973  repeated uint32 pad = 3; // The padding size; defaults to 0
974  repeated uint32 kernel_size = 4; // The kernel size
975  repeated uint32 stride = 6; // The stride; defaults to 1
976  // Factor used to dilate the kernel, (implicitly) zero-filling the resulting
977  // holes. (Kernel dilation is sometimes referred to by its use in the
978  // algorithme à trous from Holschneider et al. 1987.)
979  repeated uint32 dilation = 18; // The dilation; defaults to 1
980
981  // For 2D convolution only, the *_h and *_w versions may also be used to
982  // specify both spatial dimensions.
983  optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
984  optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
985  optional uint32 kernel_h = 11; // The kernel height (2D only)
986  optional uint32 kernel_w = 12; // The kernel width (2D only)
987  optional uint32 stride_h = 13; // The stride height (2D only)
988  optional uint32 stride_w = 14; // The stride width (2D only)
989
990  optional uint32 group = 5 [default = 1]; // The group size for group conv
991
992  optional FillerParameter weight_filler = 7; // The filler for the weight
993  optional FillerParameter bias_filler = 8; // The filler for the bias
994  enum Engine {
995    DEFAULT = 0;
996    CAFFE = 1;
997    CUDNN = 2;
998    CUDNN_FORWARD = 3;
999  }
1000  optional Engine engine = 15 [default = DEFAULT];
1001
1002  // The axis to interpret as "channels" when performing convolution.
1003  // Preceding dimensions are treated as independent inputs;
1004  // succeeding dimensions are treated as "spatial".
1005  // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
1006  // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
1007  // groups g>1) filters across the spatial axes (H, W) of the input.
1008  // With (N, C, D, H, W) inputs, and axis == 1, we perform
1009  // N independent 3D convolutions, sliding (C/g)-channels
1010  // filters across the spatial axes (D, H, W) of the input.
1011  optional int32 axis = 16 [default = 1];
1012
1013  // Whether to force use of the general ND convolution, even if a specific
1014  // implementation for blobs of the appropriate number of spatial dimensions
1015  // is available. (Currently, there is only a 2D-specific convolution
1016  // implementation; for input blobs with num_axes != 2, this option is
1017  // ignored and the ND implementation will be used.)
1018  optional bool force_nd_im2col = 17 [default = false];
1019  optional uint32 paramq = 19;
1020  optional float params = 20 [default = 0];
1021  optional float scale_in = 21;
1022  optional float scale_out = 22;
1023  optional int32 a_min = 23 [default = -128];
1024  optional int32 a_max = 24 [default = 127];
1025}
1026
1027// https://github.com/facebook/C3D/blob/master/C3D-v1.1/src/caffe/proto/caffe.proto
1028message Convolution3DParameter {
1029    optional uint32 num_output = 1; // The number of outputs for the layer
1030    optional bool bias_term = 2 [default = true]; // whether to have bias terms
1031    optional uint32 pad = 3 [default = 0]; // The padding size
1032    optional uint32 kernel_size = 4; // The kernel size
1033    optional uint32 group = 5 [default = 1]; // The group size for group conv
1034    optional uint32 kernel_depth = 6; // The kernel size
1035    optional uint32 stride = 7 [default = 1]; // The stride
1036    optional uint32 temporal_stride = 8 [default = 1]; // The stride for temporal
1037    optional FillerParameter weight_filler = 9; // The filler for the weight
1038    optional FillerParameter bias_filler = 10; // The filler for the bias
1039    optional uint32 filter_group = 11 [default = 1]; // divide filters into groups to reduce memory consumption
1040    optional uint32 temporal_pad = 12 [default = 0]; // padding size for temporal
1041}
1042
1043message CropParameter {
1044  // To crop, elements of the first bottom are selected to fit the dimensions
1045  // of the second, reference bottom. The crop is configured by
1046  // - the crop `axis` to pick the dimensions for cropping
1047  // - the crop `offset` to set the shift for all/each dimension
1048  // to align the cropped bottom with the reference bottom.
1049  // All dimensions up to but excluding `axis` are preserved, while
1050  // the dimensions including and trailing `axis` are cropped.
1051  // If only one `offset` is set, then all dimensions are offset by this amount.
1052  // Otherwise, the number of offsets must equal the number of cropped axes to
1053  // shift the crop in each dimension accordingly.
1054  // Note: standard dimensions are N,C,H,W so the default is a spatial crop,
1055  // and `axis` may be negative to index from the end (e.g., -1 for the last
1056  // axis).
1057  optional int32 axis = 1 [default = 2];
1058  repeated uint32 offset = 2;
1059}
1060
1061message DataParameter {
1062  enum DB {
1063    LEVELDB = 0;
1064    LMDB = 1;
1065  }
1066  // Specify the data source.
1067  optional string source = 1;
1068  // Specify the batch size.
1069  optional uint32 batch_size = 4;
1070  // The rand_skip variable is for the data layer to skip a few data points
1071  // to avoid all asynchronous sgd clients to start at the same point. The skip
1072  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1073  // be larger than the number of keys in the database.
1074  // DEPRECATED. Each solver accesses a different subset of the database.
1075  optional uint32 rand_skip = 7 [default = 0];
1076  optional DB backend = 8 [default = LEVELDB];
1077  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
1078  // simple scaling and subtracting the data mean, if provided. Note that the
1079  // mean subtraction is always carried out before scaling.
1080  optional float scale = 2 [default = 1];
1081  optional string mean_file = 3;
1082  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
1083  // crop an image.
1084  optional uint32 crop_size = 5 [default = 0];
1085  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
1086  // data.
1087  optional bool mirror = 6 [default = false];
1088  // Force the encoded image to have 3 color channels
1089  optional bool force_encoded_color = 9 [default = false];
1090  // Prefetch queue (Increase if data feeding bandwidth varies, within the
1091  // limit of device memory for GPU training)
1092  optional uint32 prefetch = 10 [default = 4];
1093  optional uint32 label_size = 11 [default = 0];
1094  optional uint32 data_width = 12 [default = 0];
1095  optional bool output_width = 13 [default = false];
1096}
1097
1098// Message that store parameters used by DetectionEvaluateLayer
1099message DetectionEvaluateParameter {
1100  // Number of classes that are actually predicted. Required!
1101  optional uint32 num_classes = 1;
1102  // Label id for background class. Needed for sanity check so that
1103  // background class is neither in the ground truth nor the detections.
1104  optional uint32 background_label_id = 2 [default = 0];
1105  // Threshold for deciding true/false positive.
1106  optional float overlap_threshold = 3 [default = 0.5];
1107  // If true, also consider difficult ground truth for evaluation.
1108  optional bool evaluate_difficult_gt = 4 [default = true];
1109  // A file which contains a list of names and sizes with same order
1110  // of the input DB. The file is in the following format:
1111  //    name height width
1112  //    ...
1113  // If provided, we will scale the prediction and ground truth NormalizedBBox
1114  // for evaluation.
1115  optional string name_size_file = 5;
1116  // The resize parameter used in converting NormalizedBBox to original image.
1117  optional ResizeParameter resize_param = 6;
1118}
1119
1120message NonMaximumSuppressionParameter {
1121  // Threshold to be used in nms.
1122  optional float nms_threshold = 1 [default = 0.3];
1123  // Maximum number of results to be kept.
1124  optional int32 top_k = 2;
1125  // Parameter for adaptive nms.
1126  optional float eta = 3 [default = 1.0];
1127}
1128
1129message SaveOutputParameter {
1130  // Output directory. If not empty, we will save the results.
1131  optional string output_directory = 1;
1132  // Output name prefix.
1133  optional string output_name_prefix = 2;
1134  // Output format.
1135  //    VOC - PASCAL VOC output format.
1136  //    COCO - MS COCO output format.
1137  optional string output_format = 3;
1138  // If you want to output results, must also provide the following two files.
1139  // Otherwise, we will ignore saving results.
1140  // label map file.
1141  optional string label_map_file = 4;
1142  // A file which contains a list of names and sizes with same order
1143  // of the input DB. The file is in the following format:
1144  //    name height width
1145  //    ...
1146  optional string name_size_file = 5;
1147  // Number of test images. It can be less than the lines specified in
1148  // name_size_file. For example, when we only want to evaluate on part
1149  // of the test images.
1150  optional uint32 num_test_image = 6;
1151  // The resize parameter used in saving the data.
1152  optional ResizeParameter resize_param = 7;
1153}
1154
1155// Message that store parameters used by DetectionOutputLayer
1156message DetectionOutputParameter {
1157  // Number of classes to be predicted. Required!
1158  optional uint32 num_classes = 1;
1159  // If true, bounding box are shared among different classes.
1160  optional bool share_location = 2 [default = true];
1161  // Background label id. If there is no background class,
1162  // set it as -1.
1163  optional int32 background_label_id = 3 [default = 0];
1164  // Parameters used for non maximum suppression.
1165  optional NonMaximumSuppressionParameter nms_param = 4;
1166  // Parameters used for saving detection results.
1167  optional SaveOutputParameter save_output_param = 5;
1168  // Type of coding method for bbox.
1169  optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER];
1170  // If true, variance is encoded in target; otherwise we need to adjust the
1171  // predicted offset accordingly.
1172  optional bool variance_encoded_in_target = 8 [default = false];
1173  // Number of total bboxes to be kept per image after nms step.
1174  // -1 means keeping all bboxes after nms step.
1175  optional int32 keep_top_k = 7 [default = -1];
1176  // Only consider detections whose confidences are larger than a threshold.
1177  // If not provided, consider all boxes.
1178  optional float confidence_threshold = 9;
1179  // If true, visualize the detection results.
1180  optional bool visualize = 10 [default = false];
1181  // The threshold used to visualize the detection results.
1182  optional float visualize_threshold = 11;
1183  // If provided, save outputs to video file.
1184  optional string save_file = 12;
1185  optional float objectness_score = 24 [default = 0.01];
1186}
1187
1188message DropoutParameter {
1189  optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
1190}
1191
1192// DummyDataLayer fills any number of arbitrarily shaped blobs with random
1193// (or constant) data generated by "Fillers" (see "message FillerParameter").
1194message DummyDataParameter {
1195  // This layer produces N >= 1 top blobs.  DummyDataParameter must specify 1 or N
1196  // shape fields, and 0, 1 or N data_fillers.
1197  //
1198  // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used.
1199  // If 1 data_filler is specified, it is applied to all top blobs.  If N are
1200  // specified, the ith is applied to the ith top blob.
1201  repeated FillerParameter data_filler = 1;
1202  repeated BlobShape shape = 6;
1203
1204  // 4D dimensions -- deprecated.  Use "shape" instead.
1205  repeated uint32 num = 2;
1206  repeated uint32 channels = 3;
1207  repeated uint32 height = 4;
1208  repeated uint32 width = 5;
1209}
1210
1211message EltwiseParameter {
1212  enum EltwiseOp {
1213    PROD = 0;
1214    SUM = 1;
1215    MAX = 2;
1216  }
1217  optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation
1218  repeated float coeff = 2; // blob-wise coefficient for SUM operation
1219
1220  // Whether to use an asymptotically slower (for >2 inputs) but stabler method
1221  // of computing the gradient for the PROD operation. (No effect for SUM op.)
1222  optional bool stable_prod_grad = 3 [default = true];
1223}
1224
1225// Message that stores parameters used by ELULayer
1226message ELUParameter {
1227  // Described in:
1228  // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate
1229  // Deep Network Learning by Exponential Linear Units (ELUs). arXiv
1230  optional float alpha = 1 [default = 1];
1231}
1232
1233// Message that stores parameters used by EmbedLayer
1234message EmbedParameter {
1235  optional uint32 num_output = 1; // The number of outputs for the layer
1236  // The input is given as integers to be interpreted as one-hot
1237  // vector indices with dimension num_input.  Hence num_input should be
1238  // 1 greater than the maximum possible input value.
1239  optional uint32 input_dim = 2;
1240
1241  optional bool bias_term = 3 [default = true]; // Whether to use a bias term
1242  optional FillerParameter weight_filler = 4; // The filler for the weight
1243  optional FillerParameter bias_filler = 5; // The filler for the bias
1244
1245}
1246
1247// Message that stores parameters used by ExpLayer
1248message ExpParameter {
1249  // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0.
1250  // Or if base is set to the default (-1), base is set to e,
1251  // so y = exp(shift + scale * x).
1252  optional float base = 1 [default = -1.0];
1253  optional float scale = 2 [default = 1.0];
1254  optional float shift = 3 [default = 0.0];
1255}
1256
1257/// Message that stores parameters used by FlattenLayer
1258message FlattenParameter {
1259  // The first axis to flatten: all preceding axes are retained in the output.
1260  // May be negative to index from the end (e.g., -1 for the last axis).
1261  optional int32 axis = 1 [default = 1];
1262
1263  // The last axis to flatten: all following axes are retained in the output.
1264  // May be negative to index from the end (e.g., the default -1 for the last
1265  // axis).
1266  optional int32 end_axis = 2 [default = -1];
1267}
1268
1269// Message that stores parameters used by HDF5DataLayer
1270message HDF5DataParameter {
1271  // Specify the data source.
1272  optional string source = 1;
1273  // Specify the batch size.
1274  optional uint32 batch_size = 2;
1275
1276  // Specify whether to shuffle the data.
1277  // If shuffle == true, the ordering of the HDF5 files is shuffled,
1278  // and the ordering of data within any given HDF5 file is shuffled,
1279  // but data between different files are not interleaved; all of a file's
1280  // data are output (in a random order) before moving onto another file.
1281  optional bool shuffle = 3 [default = false];
1282}
1283
1284message HDF5OutputParameter {
1285  optional string file_name = 1;
1286}
1287
1288message HingeLossParameter {
1289  enum Norm {
1290    L1 = 1;
1291    L2 = 2;
1292  }
1293  // Specify the Norm to use L1 or L2
1294  optional Norm norm = 1 [default = L1];
1295}
1296
1297message ImageDataParameter {
1298  // Specify the data source.
1299  optional string source = 1;
1300  // Specify the batch size.
1301  optional uint32 batch_size = 4 [default = 1];
1302  // The rand_skip variable is for the data layer to skip a few data points
1303  // to avoid all asynchronous sgd clients to start at the same point. The skip
1304  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
1305  // be larger than the number of keys in the database.
1306  optional uint32 rand_skip = 7 [default = 0];
1307  // Whether or not ImageLayer should shuffle the list of files at every epoch.
1308  optional bool shuffle = 8 [default = false];
1309  // It will also resize images if new_height or new_width are not zero.
1310  optional uint32 new_height = 9 [default = 0];
1311  optional uint32 new_width = 10 [default = 0];
1312  // Specify if the images are color or gray
1313  optional bool is_color = 11 [default = true];
1314  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
1315  // simple scaling and subtracting the data mean, if provided. Note that the
1316  // mean subtraction is always carried out before scaling.
1317  optional float scale = 2 [default = 1];
1318  optional string mean_file = 3;
1319  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
1320  // crop an image.
1321  optional uint32 crop_size = 5 [default = 0];
1322  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
1323  // data.
1324  optional bool mirror = 6 [default = false];
1325  optional string root_folder = 12 [default = ""];
1326  optional uint32 label_num = 13 [default = 1];
1327  optional uint32 att_num = 14 [default = 1];
1328}
1329
1330message InfogainLossParameter {
1331  // Specify the infogain matrix source.
1332  optional string source = 1;
1333  optional int32 axis = 2 [default = 1]; // axis of prob
1334}
1335
1336message InnerProductParameter {
1337  optional uint32 num_output = 1; // The number of outputs for the layer
1338  optional bool bias_term = 2 [default = true]; // whether to have bias terms
1339  optional FillerParameter weight_filler = 3; // The filler for the weight
1340  optional FillerParameter bias_filler = 4; // The filler for the bias
1341
1342  // The first axis to be lumped into a single inner product computation;
1343  // all preceding axes are retained in the output.
1344  // May be negative to index from the end (e.g., -1 for the last axis).
1345  optional int32 axis = 5 [default = 1];
1346  // Specify whether to transpose the weight matrix or not.
1347  // If transpose == true, any operations will be performed on the transpose
1348  // of the weight matrix. The weight matrix itself is not going to be transposed
1349  // but rather the transfer flag of operations will be toggled accordingly.
1350  optional bool transpose = 6 [default = false];
1351  optional uint32 paramq = 7 [default = 0];
1352  optional float params = 8 [default = 0];
1353  optional float scale_in = 9;
1354  optional float scale_out = 10;
1355  optional int32 a_min = 11 [default = -128];
1356  optional int32 a_max = 12 [default = 127];
1357}
1358
1359message InputParameter {
1360  // This layer produces N >= 1 top blob(s) to be assigned manually.
1361  // Define N shapes to set a shape for each top.
1362  // Define 1 shape to set the same shape for every top.
1363  // Define no shape to defer to reshaping manually.
1364  repeated BlobShape shape = 1;
1365}
1366
1367// Message that stores parameters used by LogLayer
1368message LogParameter {
1369  // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0.
1370  // Or if base is set to the default (-1), base is set to e,
1371  // so y = ln(shift + scale * x) = log_e(shift + scale * x)
1372  optional float base = 1 [default = -1.0];
1373  optional float scale = 2 [default = 1.0];
1374  optional float shift = 3 [default = 0.0];
1375}
1376
1377// Message that stores parameters used by LRNLayer
1378message LRNParameter {
1379  optional uint32 local_size = 1 [default = 5];
1380  optional float alpha = 2 [default = 1.];
1381  optional float beta = 3 [default = 0.75];
1382  enum NormRegion {
1383    ACROSS_CHANNELS = 0;
1384    WITHIN_CHANNEL = 1;
1385  }
1386  optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
1387  optional float k = 5 [default = 1.];
1388  enum Engine {
1389    DEFAULT = 0;
1390    CAFFE = 1;
1391    CUDNN = 2;
1392  }
1393  optional Engine engine = 6 [default = DEFAULT];
1394}
1395
1396message MemoryDataParameter {
1397  optional uint32 batch_size = 1;
1398  optional uint32 channels = 2;
1399  optional uint32 height = 3;
1400  optional uint32 width = 4;
1401}
1402
1403// Message that store parameters used by MultiBoxLossLayer
1404message MultiBoxLossParameter {
1405  // Localization loss type.
1406  enum LocLossType {
1407    L2 = 0;
1408    SMOOTH_L1 = 1;
1409  }
1410  optional LocLossType loc_loss_type = 1 [default = SMOOTH_L1];
1411  // Confidence loss type.
1412  enum ConfLossType {
1413    SOFTMAX = 0;
1414    LOGISTIC = 1;
1415  }
1416  optional ConfLossType conf_loss_type = 2 [default = SOFTMAX];
1417  // Weight for localization loss.
1418  optional float loc_weight = 3 [default = 1.0];
1419  // Number of classes to be predicted. Required!
1420  optional uint32 num_classes = 4;
1421  // If true, bounding box are shared among different classes.
1422  optional bool share_location = 5 [default = true];
1423  // Matching method during training.
1424  enum MatchType {
1425    BIPARTITE = 0;
1426    PER_PREDICTION = 1;
1427  }
1428  optional MatchType match_type = 6 [default = PER_PREDICTION];
1429  // If match_type is PER_PREDICTION, use overlap_threshold to
1430  // determine the extra matching bboxes.
1431  optional float overlap_threshold = 7 [default = 0.5];
1432  // Use prior for matching.
1433  optional bool use_prior_for_matching = 8 [default = true];
1434  // Background label id.
1435  optional uint32 background_label_id = 9 [default = 0];
1436  // If true, also consider difficult ground truth.
1437  optional bool use_difficult_gt = 10 [default = true];
1438  // If true, perform negative mining.
1439  // DEPRECATED: use mining_type instead.
1440  optional bool do_neg_mining = 11;
1441  // The negative/positive ratio.
1442  optional float neg_pos_ratio = 12 [default = 3.0];
1443  // The negative overlap upperbound for the unmatched predictions.
1444  optional float neg_overlap = 13 [default = 0.5];
1445  // Type of coding method for bbox.
1446  optional PriorBoxParameter.CodeType code_type = 14 [default = CORNER];
1447  // If true, encode the variance of prior box in the loc loss target instead of
1448  // in bbox.
1449  optional bool encode_variance_in_target = 16 [default = false];
1450  // If true, map all object classes to agnostic class. It is useful for learning
1451  // objectness detector.
1452  optional bool map_object_to_agnostic = 17 [default = false];
1453  // If true, ignore cross boundary bbox during matching.
1454  // Cross boundary bbox is a bbox who is outside of the image region.
1455  optional bool ignore_cross_boundary_bbox = 18 [default = false];
1456  // If true, only backpropagate on corners which are inside of the image
1457  // region when encode_type is CORNER or CORNER_SIZE.
1458  optional bool bp_inside = 19 [default = false];
1459  // Mining type during training.
1460  //   NONE : use all negatives.
1461  //   MAX_NEGATIVE : select negatives based on the score.
1462  //   HARD_EXAMPLE : select hard examples based on "Training Region-based Object Detectors with Online Hard Example Mining", Shrivastava et.al.
1463  enum MiningType {
1464    NONE = 0;
1465    MAX_NEGATIVE = 1;
1466    HARD_EXAMPLE = 2;
1467  }
1468  optional MiningType mining_type = 20 [default = MAX_NEGATIVE];
1469  // Parameters used for non maximum suppression durig hard example mining.
1470  optional NonMaximumSuppressionParameter nms_param = 21;
1471  optional int32 sample_size = 22 [default = 64];
1472  optional bool use_prior_for_nms = 23 [default = false];
1473}
1474
1475message MVNParameter {
1476  // This parameter can be set to false to normalize mean only
1477  optional bool normalize_variance = 1 [default = true];
1478
1479  // This parameter can be set to true to perform DNN-like MVN
1480  optional bool across_channels = 2 [default = false];
1481
1482  // Epsilon for not dividing by zero while normalizing variance
1483  optional float eps = 3 [default = 1e-9];
1484}
1485
1486// Message that stores parameters used by NormalizeLayer
1487message NormalizeParameter {
1488  optional bool across_spatial = 1 [default = true];
1489  // Initial value of scale. Default is 1.0 for all
1490  optional FillerParameter scale_filler = 2;
1491  // Whether or not scale parameters are shared across channels.
1492  optional bool channel_shared = 3 [default = true];
1493  // Epsilon for not dividing by zero while normalizing variance
1494  optional float eps = 4 [default = 1e-10];
1495}
1496
1497message ParameterParameter {
1498  optional BlobShape shape = 1;
1499}
1500
1501message PermuteParameter {
1502  // The new orders of the axes of data. Notice it should be with
1503  // in the same range as the input data, and it starts from 0.
1504  // Do not provide repeated order.
1505  repeated uint32 order = 1;
1506}
1507
1508message PoolingParameter {
1509  enum PoolMethod {
1510    MAX = 0;
1511    AVE = 1;
1512    STOCHASTIC = 2;
1513  }
1514  optional bool avg_include_pad = 13 [default = true];
1515  optional PoolMethod pool = 1 [default = MAX]; // The pooling method
1516  // Pad, kernel size, and stride are all given as a single value for equal
1517  // dimensions in height and width or as Y, X pairs.
1518  optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
1519  optional uint32 pad_h = 9 [default = 0]; // The padding height
1520  optional uint32 pad_w = 10 [default = 0]; // The padding width
1521  optional uint32 kernel_size = 2; // The kernel size (square)
1522  optional uint32 kernel_h = 5; // The kernel height
1523  optional uint32 kernel_w = 6; // The kernel width
1524  optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
1525  optional uint32 stride_h = 7; // The stride height
1526  optional uint32 stride_w = 8; // The stride width
1527  enum Engine {
1528    DEFAULT = 0;
1529    CAFFE = 1;
1530    CUDNN = 2;
1531  }
1532  optional Engine engine = 11 [default = DEFAULT];
1533  // If global_pooling then it will pool over the size of the bottom by doing
1534  // kernel_h = bottom->height and kernel_w = bottom->width
1535  optional bool global_pooling = 12 [default = false];
1536  enum RoundMode {
1537    CEIL = 0;
1538    FLOOR = 1;
1539  }
1540  optional RoundMode round_mode = 14 [default = CEIL];
1541
1542}
1543
1544message Pooling3DParameter {
1545    enum PoolMethod {
1546        MAX = 0;
1547        AVE = 1;
1548        STOCHASTIC = 2;
1549    }
1550    optional PoolMethod pool = 1 [default = MAX]; // The pooling method
1551    optional uint32 kernel_size = 2; // The kernel size
1552    optional uint32 kernel_depth = 3; // The kernel depth
1553    optional uint32 stride = 4 [default = 1]; // The stride
1554    optional uint32 temporal_stride = 5 [default = 1]; // The temporal stride
1555    optional uint32 pad = 6 [default = 0];
1556    optional uint32 temporal_pad = 7 [default = 0];
1557}
1558
1559message PowerParameter {
1560  // PowerLayer computes outputs y = (shift + scale * x) ^ power.
1561  optional float power = 1 [default = 1.0];
1562  optional float scale = 2 [default = 1.0];
1563  optional float shift = 3 [default = 0.0];
1564}
1565
1566// Message that store parameters used by PriorBoxLayer
1567message PriorBoxParameter {
1568  // Encode/decode type.
1569  enum CodeType {
1570    CORNER = 1;
1571    CENTER_SIZE = 2;
1572    CORNER_SIZE = 3;
1573  }
1574  // Minimum box size (in pixels). Required!
1575  repeated float min_size = 1;
1576  // Maximum box size (in pixels). Required!
1577  repeated float max_size = 2;
1578  // Various of aspect ratios. Duplicate ratios will be ignored.
1579  // If none is provided, we use default ratio 1.
1580  repeated float aspect_ratio = 3;
1581  // If true, will flip each aspect ratio.
1582  // For example, if there is aspect ratio "r",
1583  // we will generate aspect ratio "1.0/r" as well.
1584  optional bool flip = 4 [default = true];
1585  // If true, will clip the prior so that it is within [0, 1]
1586  optional bool clip = 5 [default = false];
1587  // Variance for adjusting the prior bboxes.
1588  repeated float variance = 6;
1589  // By default, we calculate img_height, img_width, step_x, step_y based on
1590  // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely
1591  // provided.
1592  // Explicitly provide the img_size.
1593  optional uint32 img_size = 7;
1594  // Either img_size or img_h/img_w should be specified; not both.
1595  optional uint32 img_h = 8;
1596  optional uint32 img_w = 9;
1597
1598  // Explicitly provide the step size.
1599  optional float step = 10;
1600  // Either step or step_h/step_w should be specified; not both.
1601  optional float step_h = 11;
1602  optional float step_w = 12;
1603
1604  // Offset to the top left corner of each cell.
1605  optional float offset = 13 [default = 0.5];
1606}
1607
1608message PythonParameter {
1609  optional string module = 1;
1610  optional string layer = 2;
1611  // This value is set to the attribute `param_str` of the `PythonLayer` object
1612  // in Python before calling the `setup()` method. This could be a number,
1613  // string, dictionary in Python dict format, JSON, etc. You may parse this
1614  // string in `setup` method and use it in `forward` and `backward`.
1615  optional string param_str = 3 [default = ''];
1616  // Whether this PythonLayer is shared among worker solvers during data parallelism.
1617  // If true, each worker solver sequentially run forward from this layer.
1618  // This value should be set true if you are using it as a data layer.
1619  optional bool share_in_parallel = 4 [default = false];
1620}
1621
1622// Message that stores parameters used by RecurrentLayer
1623message RecurrentParameter {
1624  // The dimension of the output (and usually hidden state) representation --
1625  // must be explicitly set to non-zero.
1626  optional uint32 num_output = 1 [default = 0];
1627
1628  optional FillerParameter weight_filler = 2; // The filler for the weight
1629  optional FillerParameter bias_filler = 3; // The filler for the bias
1630
1631  // Whether to enable displaying debug_info in the unrolled recurrent net.
1632  optional bool debug_info = 4 [default = false];
1633
1634  // Whether to add as additional inputs (bottoms) the initial hidden state
1635  // blobs, and add as additional outputs (tops) the final timestep hidden state
1636  // blobs.  The number of additional bottom/top blobs required depends on the
1637  // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs.
1638  optional bool expose_hidden = 5 [default = false];
1639}
1640
1641// Message that stores parameters used by ReductionLayer
1642message ReductionParameter {
1643  enum ReductionOp {
1644    SUM = 1;
1645    ASUM = 2;
1646    SUMSQ = 3;
1647    MEAN = 4;
1648  }
1649
1650  optional ReductionOp operation = 1 [default = SUM]; // reduction operation
1651
1652  // The first axis to reduce to a scalar -- may be negative to index from the
1653  // end (e.g., -1 for the last axis).
1654  // (Currently, only reduction along ALL "tail" axes is supported; reduction
1655  // of axis M through N, where N < num_axes - 1, is unsupported.)
1656  // Suppose we have an n-axis bottom Blob with shape:
1657  //     (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)).
1658  // If axis == m, the output Blob will have shape
1659  //     (d0, d1, d2, ..., d(m-1)),
1660  // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1))
1661  // times, each including (dm * d(m+1) * ... * d(n-1)) individual data.
1662  // If axis == 0 (the default), the output Blob always has the empty shape
1663  // (count 1), performing reduction across the entire input --
1664  // often useful for creating new loss functions.
1665  optional int32 axis = 2 [default = 0];
1666
1667  optional float coeff = 3 [default = 1.0]; // coefficient for output
1668}
1669
1670// Message that stores parameters used by ReLULayer
1671message ReLUParameter {
1672  // Allow non-zero slope for negative inputs to speed up optimization
1673  // Described in:
1674  // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities
1675  // improve neural network acoustic models. In ICML Workshop on Deep Learning
1676  // for Audio, Speech, and Language Processing.
1677  optional float negative_slope = 1 [default = 0];
1678  enum Engine {
1679    DEFAULT = 0;
1680    CAFFE = 1;
1681    CUDNN = 2;
1682  }
1683  optional Engine engine = 2 [default = DEFAULT];
1684}
1685
1686message ReshapeParameter {
1687  // Specify the output dimensions. If some of the dimensions are set to 0,
1688  // the corresponding dimension from the bottom layer is used (unchanged).
1689  // Exactly one dimension may be set to -1, in which case its value is
1690  // inferred from the count of the bottom blob and the remaining dimensions.
1691  // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8:
1692  //
1693  //   layer {
1694  //     type: "Reshape" bottom: "input" top: "output"
1695  //     reshape_param { ... }
1696  //   }
1697  //
1698  // If "input" is 2D with shape 2 x 8, then the following reshape_param
1699  // specifications are all equivalent, producing a 3D blob "output" with shape
1700  // 2 x 2 x 4:
1701  //
1702  //   reshape_param { shape { dim:  2  dim: 2  dim:  4 } }
1703  //   reshape_param { shape { dim:  0  dim: 2  dim:  4 } }
1704  //   reshape_param { shape { dim:  0  dim: 2  dim: -1 } }
1705  //   reshape_param { shape { dim:  0  dim:-1  dim:  4 } }
1706  //
1707  optional BlobShape shape = 1;
1708
1709  // axis and num_axes control the portion of the bottom blob's shape that are
1710  // replaced by (included in) the reshape. By default (axis == 0 and
1711  // num_axes == -1), the entire bottom blob shape is included in the reshape,
1712  // and hence the shape field must specify the entire output shape.
1713  //
1714  // axis may be non-zero to retain some portion of the beginning of the input
1715  // shape (and may be negative to index from the end; e.g., -1 to begin the
1716  // reshape after the last axis, including nothing in the reshape,
1717  // -2 to include only the last axis, etc.).
1718  //
1719  // For example, suppose "input" is a 2D blob with shape 2 x 8.
1720  // Then the following ReshapeLayer specifications are all equivalent,
1721  // producing a blob "output" with shape 2 x 2 x 4:
1722  //
1723  //   reshape_param { shape { dim: 2  dim: 2  dim: 4 } }
1724  //   reshape_param { shape { dim: 2  dim: 4 } axis:  1 }
1725  //   reshape_param { shape { dim: 2  dim: 4 } axis: -3 }
1726  //
1727  // num_axes specifies the extent of the reshape.
1728  // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on
1729  // input axes in the range [axis, axis+num_axes].
1730  // num_axes may also be -1, the default, to include all remaining axes
1731  // (starting from axis).
1732  //
1733  // For example, suppose "input" is a 2D blob with shape 2 x 8.
1734  // Then the following ReshapeLayer specifications are equivalent,
1735  // producing a blob "output" with shape 1 x 2 x 8.
1736  //
1737  //   reshape_param { shape { dim:  1  dim: 2  dim:  8 } }
1738  //   reshape_param { shape { dim:  1  dim: 2  }  num_axes: 1 }
1739  //   reshape_param { shape { dim:  1  }  num_axes: 0 }
1740  //
1741  // On the other hand, these would produce output blob shape 2 x 1 x 8:
1742  //
1743  //   reshape_param { shape { dim: 2  dim: 1  dim: 8  }  }
1744  //   reshape_param { shape { dim: 1 }  axis: 1  num_axes: 0 }
1745  //
1746  optional int32 axis = 2 [default = 0];
1747  optional int32 num_axes = 3 [default = -1];
1748}
1749
1750message ScaleParameter {
1751  // The first axis of bottom[0] (the first input Blob) along which to apply
1752  // bottom[1] (the second input Blob).  May be negative to index from the end
1753  // (e.g., -1 for the last axis).
1754  //
1755  // For example, if bottom[0] is 4D with shape 100x3x40x60, the output
1756  // top[0] will have the same shape, and bottom[1] may have any of the
1757  // following shapes (for the given value of axis):
1758  //    (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60
1759  //    (axis == 1 == -3)          3;     3x40;     3x40x60
1760  //    (axis == 2 == -2)                   40;       40x60
1761  //    (axis == 3 == -1)                                60
1762  // Furthermore, bottom[1] may have the empty shape (regardless of the value of
1763  // "axis") -- a scalar multiplier.
1764  optional int32 axis = 1 [default = 1];
1765
1766  // (num_axes is ignored unless just one bottom is given and the scale is
1767  // a learned parameter of the layer.  Otherwise, num_axes is determined by the
1768  // number of axes by the second bottom.)
1769  // The number of axes of the input (bottom[0]) covered by the scale
1770  // parameter, or -1 to cover all axes of bottom[0] starting from `axis`.
1771  // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar.
1772  optional int32 num_axes = 2 [default = 1];
1773
1774  // (filler is ignored unless just one bottom is given and the scale is
1775  // a learned parameter of the layer.)
1776  // The initialization for the learned scale parameter.
1777  // Default is the unit (1) initialization, resulting in the ScaleLayer
1778  // initially performing the identity operation.
1779  optional FillerParameter filler = 3;
1780
1781  // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but
1782  // may be more efficient).  Initialized with bias_filler (defaults to 0).
1783  optional bool bias_term = 4 [default = false];
1784  optional FillerParameter bias_filler = 5;
1785}
1786
1787message SigmoidParameter {
1788  enum Engine {
1789    DEFAULT = 0;
1790    CAFFE = 1;
1791    CUDNN = 2;
1792  }
1793  optional Engine engine = 1 [default = DEFAULT];
1794}
1795
1796message SliceParameter {
1797  // The axis along which to slice -- may be negative to index from the end
1798  // (e.g., -1 for the last axis).
1799  // By default, SliceLayer concatenates blobs along the "channels" axis (1).
1800  optional int32 axis = 3 [default = 1];
1801  repeated uint32 slice_point = 2;
1802
1803  // DEPRECATED: alias for "axis" -- does not support negative indexing.
1804  optional uint32 slice_dim = 1 [default = 1];
1805}
1806
1807// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer
1808message SoftmaxParameter {
1809  enum Engine {
1810    DEFAULT = 0;
1811    CAFFE = 1;
1812    CUDNN = 2;
1813  }
1814  optional Engine engine = 1 [default = DEFAULT];
1815
1816  // The axis along which to perform the softmax -- may be negative to index
1817  // from the end (e.g., -1 for the last axis).
1818  // Any other axes will be evaluated as independent softmaxes.
1819  optional int32 axis = 2 [default = 1];
1820}
1821
1822message TanHParameter {
1823  enum Engine {
1824    DEFAULT = 0;
1825    CAFFE = 1;
1826    CUDNN = 2;
1827  }
1828  optional Engine engine = 1 [default = DEFAULT];
1829}
1830
1831// Message that stores parameters used by TileLayer
1832message TileParameter {
1833  // The index of the axis to tile.
1834  optional int32 axis = 1 [default = 1];
1835
1836  // The number of copies (tiles) of the blob to output.
1837  optional int32 tiles = 2;
1838}
1839
1840// Message that stores parameters used by ThresholdLayer
1841message ThresholdParameter {
1842  optional float threshold = 1 [default = 0]; // Strictly positive values
1843}
1844
1845message WindowDataParameter {
1846  // Specify the data source.
1847  optional string source = 1;
1848  // For data pre-processing, we can do simple scaling and subtracting the
1849  // data mean, if provided. Note that the mean subtraction is always carried
1850  // out before scaling.
1851  optional float scale = 2 [default = 1];
1852  optional string mean_file = 3;
1853  // Specify the batch size.
1854  optional uint32 batch_size = 4;
1855  // Specify if we would like to randomly crop an image.
1856  optional uint32 crop_size = 5 [default = 0];
1857  // Specify if we want to randomly mirror data.
1858  optional bool mirror = 6 [default = false];
1859  // Foreground (object) overlap threshold
1860  optional float fg_threshold = 7 [default = 0.5];
1861  // Background (non-object) overlap threshold
1862  optional float bg_threshold = 8 [default = 0.5];
1863  // Fraction of batch that should be foreground objects
1864  optional float fg_fraction = 9 [default = 0.25];
1865  // Amount of contextual padding to add around a window
1866  // (used only by the window_data_layer)
1867  optional uint32 context_pad = 10 [default = 0];
1868  // Mode for cropping out a detection window
1869  // warp: cropped window is warped to a fixed size and aspect ratio
1870  // square: the tightest square around the window is cropped
1871  optional string crop_mode = 11 [default = "warp"];
1872  // cache_images: will load all images in memory for faster access
1873  optional bool cache_images = 12 [default = false];
1874  // append root_folder to locate images
1875  optional string root_folder = 13 [default = ""];
1876}
1877
1878message SPPParameter {
1879  enum PoolMethod {
1880    MAX = 0;
1881    AVE = 1;
1882    STOCHASTIC = 2;
1883  }
1884  optional uint32 pyramid_height = 1;
1885  optional PoolMethod pool = 2 [default = MAX]; // The pooling method
1886  enum Engine {
1887    DEFAULT = 0;
1888    CAFFE = 1;
1889    CUDNN = 2;
1890  }
1891  optional Engine engine = 6 [default = DEFAULT];
1892}
1893
1894// DEPRECATED: use LayerParameter.
1895message V1LayerParameter {
1896  repeated string bottom = 2;
1897  repeated string top = 3;
1898  optional string name = 4;
1899  repeated NetStateRule include = 32;
1900  repeated NetStateRule exclude = 33;
1901  enum LayerType {
1902    NONE = 0;
1903    ABSVAL = 35;
1904    ACCURACY = 1;
1905    ARGMAX = 30;
1906    BNLL = 2;
1907    CONCAT = 3;
1908    CONTRASTIVE_LOSS = 37;
1909    CONVOLUTION = 4;
1910    DATA = 5;
1911    DECONVOLUTION = 39;
1912    DROPOUT = 6;
1913    DUMMY_DATA = 32;
1914    EUCLIDEAN_LOSS = 7;
1915    ELTWISE = 25;
1916    EXP = 38;
1917    FLATTEN = 8;
1918    HDF5_DATA = 9;
1919    HDF5_OUTPUT = 10;
1920    HINGE_LOSS = 28;
1921    IM2COL = 11;
1922    IMAGE_DATA = 12;
1923    INFOGAIN_LOSS = 13;
1924    INNER_PRODUCT = 14;
1925    LRN = 15;
1926    MEMORY_DATA = 29;
1927    MULTINOMIAL_LOGISTIC_LOSS = 16;
1928    MVN = 34;
1929    POOLING = 17;
1930    POWER = 26;
1931    RELU = 18;
1932    SIGMOID = 19;
1933    SIGMOID_CROSS_ENTROPY_LOSS = 27;
1934    SILENCE = 36;
1935    SOFTMAX = 20;
1936    SOFTMAX_LOSS = 21;
1937    SPLIT = 22;
1938    SLICE = 33;
1939    TANH = 23;
1940    WINDOW_DATA = 24;
1941    THRESHOLD = 31;
1942  }
1943  optional LayerType type = 5;
1944  repeated BlobProto blobs = 6;
1945  repeated string param = 1001;
1946  repeated DimCheckMode blob_share_mode = 1002;
1947  enum DimCheckMode {
1948    STRICT = 0;
1949    PERMISSIVE = 1;
1950  }
1951  repeated float blobs_lr = 7;
1952  repeated float weight_decay = 8;
1953  repeated float loss_weight = 35;
1954  optional AccuracyParameter accuracy_param = 27;
1955  optional ArgMaxParameter argmax_param = 23;
1956  optional ConcatParameter concat_param = 9;
1957  optional ContrastiveLossParameter contrastive_loss_param = 40;
1958  optional ConvolutionParameter convolution_param = 10;
1959  optional DataParameter data_param = 11;
1960  optional DropoutParameter dropout_param = 12;
1961  optional DummyDataParameter dummy_data_param = 26;
1962  optional EltwiseParameter eltwise_param = 24;
1963  optional ExpParameter exp_param = 41;
1964  optional HDF5DataParameter hdf5_data_param = 13;
1965  optional HDF5OutputParameter hdf5_output_param = 14;
1966  optional HingeLossParameter hinge_loss_param = 29;
1967  optional ImageDataParameter image_data_param = 15;
1968  optional InfogainLossParameter infogain_loss_param = 16;
1969  optional InnerProductParameter inner_product_param = 17;
1970  optional LRNParameter lrn_param = 18;
1971  optional MemoryDataParameter memory_data_param = 22;
1972  optional MVNParameter mvn_param = 34;
1973  optional PoolingParameter pooling_param = 19;
1974  optional PowerParameter power_param = 21;
1975  optional ReLUParameter relu_param = 30;
1976  optional SigmoidParameter sigmoid_param = 38;
1977  optional SoftmaxParameter softmax_param = 39;
1978  optional SliceParameter slice_param = 31;
1979  optional TanHParameter tanh_param = 37;
1980  optional ThresholdParameter threshold_param = 25;
1981  optional WindowDataParameter window_data_param = 20;
1982  optional TransformationParameter transform_param = 36;
1983  optional LossParameter loss_param = 42;
1984  optional V0LayerParameter layer = 1;
1985}
1986
1987// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters
1988// in Caffe.  We keep this message type around for legacy support.
1989message V0LayerParameter {
1990  optional string name = 1; // the layer name
1991  optional string type = 2; // the string to specify the layer type
1992
1993  // Parameters to specify layers with inner products.
1994  optional uint32 num_output = 3; // The number of outputs for the layer
1995  optional bool biasterm = 4 [default = true]; // whether to have bias terms
1996  optional FillerParameter weight_filler = 5; // The filler for the weight
1997  optional FillerParameter bias_filler = 6; // The filler for the bias
1998
1999  optional uint32 pad = 7 [default = 0]; // The padding size
2000  optional uint32 kernelsize = 8; // The kernel size
2001  optional uint32 group = 9 [default = 1]; // The group size for group conv
2002  optional uint32 stride = 10 [default = 1]; // The stride
2003  enum PoolMethod {
2004    MAX = 0;
2005    AVE = 1;
2006    STOCHASTIC = 2;
2007  }
2008  optional PoolMethod pool = 11 [default = MAX]; // The pooling method
2009  optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio
2010
2011  optional uint32 local_size = 13 [default = 5]; // for local response norm
2012  optional float alpha = 14 [default = 1.]; // for local response norm
2013  optional float beta = 15 [default = 0.75]; // for local response norm
2014  optional float k = 22 [default = 1.];
2015
2016  // For data layers, specify the data source
2017  optional string source = 16;
2018  // For data pre-processing, we can do simple scaling and subtracting the
2019  // data mean, if provided. Note that the mean subtraction is always carried
2020  // out before scaling.
2021  optional float scale = 17 [default = 1];
2022  optional string meanfile = 18;
2023  // For data layers, specify the batch size.
2024  optional uint32 batchsize = 19;
2025  // For data layers, specify if we would like to randomly crop an image.
2026  optional uint32 cropsize = 20 [default = 0];
2027  // For data layers, specify if we want to randomly mirror data.
2028  optional bool mirror = 21 [default = false];
2029
2030  // The blobs containing the numeric parameters of the layer
2031  repeated BlobProto blobs = 50;
2032  // The ratio that is multiplied on the global learning rate. If you want to
2033  // set the learning ratio for one blob, you need to set it for all blobs.
2034  repeated float blobs_lr = 51;
2035  // The weight decay that is multiplied on the global weight decay.
2036  repeated float weight_decay = 52;
2037
2038  // The rand_skip variable is for the data layer to skip a few data points
2039  // to avoid all asynchronous sgd clients to start at the same point. The skip
2040  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
2041  // be larger than the number of keys in the database.
2042  optional uint32 rand_skip = 53 [default = 0];
2043
2044  // Fields related to detection (det_*)
2045  // foreground (object) overlap threshold
2046  optional float det_fg_threshold = 54 [default = 0.5];
2047  // background (non-object) overlap threshold
2048  optional float det_bg_threshold = 55 [default = 0.5];
2049  // Fraction of batch that should be foreground objects
2050  optional float det_fg_fraction = 56 [default = 0.25];
2051
2052  // optional bool OBSOLETE_can_clobber = 57 [default = true];
2053
2054  // Amount of contextual padding to add around a window
2055  // (used only by the window_data_layer)
2056  optional uint32 det_context_pad = 58 [default = 0];
2057
2058  // Mode for cropping out a detection window
2059  // warp: cropped window is warped to a fixed size and aspect ratio
2060  // square: the tightest square around the window is cropped
2061  optional string det_crop_mode = 59 [default = "warp"];
2062
2063  // For ReshapeLayer, one needs to specify the new dimensions.
2064  optional int32 new_num = 60 [default = 0];
2065  optional int32 new_channels = 61 [default = 0];
2066  optional int32 new_height = 62 [default = 0];
2067  optional int32 new_width = 63 [default = 0];
2068
2069  // Whether or not ImageLayer should shuffle the list of files at every epoch.
2070  // It will also resize images if new_height or new_width are not zero.
2071  optional bool shuffle_images = 64 [default = false];
2072
2073  // For ConcatLayer, one needs to specify the dimension for concatenation, and
2074  // the other dimensions must be the same for all the bottom blobs.
2075  // By default it will concatenate blobs along the channels dimension.
2076  optional uint32 concat_dim = 65 [default = 1];
2077
2078  optional HDF5OutputParameter hdf5_output_param = 1001;
2079}
2080
2081message PReLUParameter {
2082  // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers:
2083  // Surpassing Human-Level Performance on ImageNet Classification, 2015.
2084
2085  // Initial value of a_i. Default is a_i=0.25 for all i.
2086  optional FillerParameter filler = 1;
2087  // Whether or not slope parameters are shared across channels.
2088  optional bool channel_shared = 2 [default = false];
2089}
2090
2091message ShuffleChannelParameter {
2092  // first introduced by
2093  // "ShuffleNet: An Extremely Efficient Convolutional Neural Network
2094  // for Mobile Devices"
2095  optional uint32 group = 1[default = 1]; // The number of group
2096}
2097
2098message UpsampleParameter {
2099  optional float scale = 1;
2100}
2101