1syntax = "proto2"; 2 3package caffe; 4 5// Specifies the shape (dimensions) of a Blob. 6message BlobShape { 7 repeated int64 dim = 1 [packed = true]; 8} 9 10message BlobProto { 11 optional BlobShape shape = 7; 12 repeated float data = 5 [packed = true]; 13 repeated float diff = 6 [packed = true]; 14 repeated double double_data = 8 [packed = true]; 15 repeated double double_diff = 9 [packed = true]; 16 17 // 4D dimensions -- deprecated. Use "shape" instead. 18 optional int32 num = 1 [default = 0]; 19 optional int32 channels = 2 [default = 0]; 20 optional int32 height = 3 [default = 0]; 21 optional int32 width = 4 [default = 0]; 22} 23 24// The BlobProtoVector is simply a way to pass multiple blobproto instances 25// around. 26message BlobProtoVector { 27 repeated BlobProto blobs = 1; 28} 29 30message Datum { 31 optional int32 channels = 1; 32 optional int32 height = 2; 33 optional int32 width = 3; 34 // the actual image data, in bytes 35 optional bytes data = 4; 36 optional int32 label = 5; 37 // Optionally, the datum could also hold float data. 38 repeated float float_data = 6; 39 // If true data contains an encoded image that need to be decoded 40 optional bool encoded = 7 [default = false]; 41 repeated int32 labels = 8; 42 optional float float_label = 9; 43 repeated float float_labels = 10; 44 optional bytes byte_labels = 11; 45} 46 47message MTCNNBBox { 48 optional float xmin = 1; 49 optional float ymin = 2; 50 optional float xmax = 3; 51 optional float ymax = 4; 52} 53 54message MTCNNDatum { 55 optional Datum datum = 1; 56 optional MTCNNBBox roi = 2; 57 repeated float pts = 3; 58 //optional int32 labels_size = 4 [default = 0]; 59} 60// The label (display) name and label id. 61message LabelMapItem { 62 // Both name and label are required. 63 optional string name = 1; 64 optional int32 label = 2; 65 // display_name is optional. 66 optional string display_name = 3; 67} 68 69message LabelMap { 70 repeated LabelMapItem item = 1; 71} 72 73// Sample a bbox in the normalized space [0, 1] with provided constraints. 74message Sampler { 75 // Minimum scale of the sampled bbox. 76 optional float min_scale = 1 [default = 1.]; 77 // Maximum scale of the sampled bbox. 78 optional float max_scale = 2 [default = 1.]; 79 80 // Minimum aspect ratio of the sampled bbox. 81 optional float min_aspect_ratio = 3 [default = 1.]; 82 // Maximum aspect ratio of the sampled bbox. 83 optional float max_aspect_ratio = 4 [default = 1.]; 84} 85 86// Constraints for selecting sampled bbox. 87message SampleConstraint { 88 // Minimum Jaccard overlap between sampled bbox and all bboxes in 89 // AnnotationGroup. 90 optional float min_jaccard_overlap = 1; 91 // Maximum Jaccard overlap between sampled bbox and all bboxes in 92 // AnnotationGroup. 93 optional float max_jaccard_overlap = 2; 94 95 // Minimum coverage of sampled bbox by all bboxes in AnnotationGroup. 96 optional float min_sample_coverage = 3; 97 // Maximum coverage of sampled bbox by all bboxes in AnnotationGroup. 98 optional float max_sample_coverage = 4; 99 100 // Minimum coverage of all bboxes in AnnotationGroup by sampled bbox. 101 optional float min_object_coverage = 5; 102 // Maximum coverage of all bboxes in AnnotationGroup by sampled bbox. 103 optional float max_object_coverage = 6; 104} 105 106// Sample a batch of bboxes with provided constraints. 107message BatchSampler { 108 // Use original image as the source for sampling. 109 optional bool use_original_image = 1 [default = true]; 110 111 // Constraints for sampling bbox. 112 optional Sampler sampler = 2; 113 114 // Constraints for determining if a sampled bbox is positive or negative. 115 optional SampleConstraint sample_constraint = 3; 116 117 // If provided, break when found certain number of samples satisfing the 118 // sample_constraint. 119 optional uint32 max_sample = 4; 120 121 // Maximum number of trials for sampling to avoid infinite loop. 122 optional uint32 max_trials = 5 [default = 100]; 123} 124 125// Condition for emitting annotations. 126message EmitConstraint { 127 enum EmitType { 128 CENTER = 0; 129 MIN_OVERLAP = 1; 130 } 131 optional EmitType emit_type = 1 [default = CENTER]; 132 // If emit_type is MIN_OVERLAP, provide the emit_overlap. 133 optional float emit_overlap = 2; 134} 135 136// The normalized bounding box [0, 1] w.r.t. the input image size. 137message NormalizedBBox { 138 optional float xmin = 1; 139 optional float ymin = 2; 140 optional float xmax = 3; 141 optional float ymax = 4; 142 optional int32 label = 5; 143 optional bool difficult = 6; 144 optional float score = 7; 145 optional float size = 8; 146} 147 148// Annotation for each object instance. 149message Annotation { 150 optional int32 instance_id = 1 [default = 0]; 151 optional NormalizedBBox bbox = 2; 152} 153 154// Group of annotations for a particular label. 155message AnnotationGroup { 156 optional int32 group_label = 1; 157 repeated Annotation annotation = 2; 158} 159 160// An extension of Datum which contains "rich" annotations. 161message AnnotatedDatum { 162 enum AnnotationType { 163 BBOX = 0; 164 } 165 optional Datum datum = 1; 166 // If there are "rich" annotations, specify the type of annotation. 167 // Currently it only supports bounding box. 168 // If there are no "rich" annotations, use label in datum instead. 169 optional AnnotationType type = 2; 170 // Each group contains annotation for a particular class. 171 repeated AnnotationGroup annotation_group = 3; 172} 173 174message FillerParameter { 175 // The filler type. 176 optional string type = 1 [default = 'constant']; 177 optional float value = 2 [default = 0]; // the value in constant filler 178 optional float min = 3 [default = 0]; // the min value in uniform filler 179 optional float max = 4 [default = 1]; // the max value in uniform filler 180 optional float mean = 5 [default = 0]; // the mean value in Gaussian filler 181 optional float std = 6 [default = 1]; // the std value in Gaussian filler 182 // The expected number of non-zero output weights for a given input in 183 // Gaussian filler -- the default -1 means don't perform sparsification. 184 optional int32 sparse = 7 [default = -1]; 185 // Normalize the filler variance by fan_in, fan_out, or their average. 186 // Applies to 'xavier' and 'msra' fillers. 187 enum VarianceNorm { 188 FAN_IN = 0; 189 FAN_OUT = 1; 190 AVERAGE = 2; 191 } 192 optional VarianceNorm variance_norm = 8 [default = FAN_IN]; 193} 194 195message NetParameter { 196 optional string name = 1; // consider giving the network a name 197 // DEPRECATED. See InputParameter. The input blobs to the network. 198 repeated string input = 3; 199 // DEPRECATED. See InputParameter. The shape of the input blobs. 200 repeated BlobShape input_shape = 8; 201 202 // 4D input dimensions -- deprecated. Use "input_shape" instead. 203 // If specified, for each input blob there should be four 204 // values specifying the num, channels, height and width of the input blob. 205 // Thus, there should be a total of (4 * #input) numbers. 206 repeated int32 input_dim = 4; 207 208 // Whether the network will force every layer to carry out backward operation. 209 // If set False, then whether to carry out backward is determined 210 // automatically according to the net structure and learning rates. 211 optional bool force_backward = 5 [default = false]; 212 // The current "state" of the network, including the phase, level, and stage. 213 // Some layers may be included/excluded depending on this state and the states 214 // specified in the layers' include and exclude fields. 215 optional NetState state = 6; 216 217 // Print debugging information about results while running Net::Forward, 218 // Net::Backward, and Net::Update. 219 optional bool debug_info = 7 [default = false]; 220 221 // The layers that make up the net. Each of their configurations, including 222 // connectivity and behavior, is specified as a LayerParameter. 223 repeated LayerParameter layer = 100; // ID 100 so layers are printed last. 224 225 // DEPRECATED: use 'layer' instead. 226 repeated V1LayerParameter layers = 2; 227} 228 229// NOTE 230// Update the next available ID when you add a new SolverParameter field. 231// 232// SolverParameter next available ID: 42 (last added: layer_wise_reduce) 233message SolverParameter { 234 ////////////////////////////////////////////////////////////////////////////// 235 // Specifying the train and test networks 236 // 237 // Exactly one train net must be specified using one of the following fields: 238 // train_net_param, train_net, net_param, net 239 // One or more test nets may be specified using any of the following fields: 240 // test_net_param, test_net, net_param, net 241 // If more than one test net field is specified (e.g., both net and 242 // test_net are specified), they will be evaluated in the field order given 243 // above: (1) test_net_param, (2) test_net, (3) net_param/net. 244 // A test_iter must be specified for each test_net. 245 // A test_level and/or a test_stage may also be specified for each test_net. 246 ////////////////////////////////////////////////////////////////////////////// 247 248 // Proto filename for the train net, possibly combined with one or more 249 // test nets. 250 optional string net = 24; 251 // Inline train net param, possibly combined with one or more test nets. 252 optional NetParameter net_param = 25; 253 254 optional string train_net = 1; // Proto filename for the train net. 255 repeated string test_net = 2; // Proto filenames for the test nets. 256 optional NetParameter train_net_param = 21; // Inline train net params. 257 repeated NetParameter test_net_param = 22; // Inline test net params. 258 259 // The states for the train/test nets. Must be unspecified or 260 // specified once per net. 261 // 262 // By default, train_state will have phase = TRAIN, 263 // and all test_state's will have phase = TEST. 264 // Other defaults are set according to the NetState defaults. 265 optional NetState train_state = 26; 266 repeated NetState test_state = 27; 267 268 // Evaluation type. 269 optional string eval_type = 41 [default = "classification"]; 270 // ap_version: different ways of computing Average Precision. 271 // Check https://sanchom.wordpress.com/tag/average-precision/ for details. 272 // 11point: the 11-point interpolated average precision. Used in VOC2007. 273 // MaxIntegral: maximally interpolated AP. Used in VOC2012/ILSVRC. 274 // Integral: the natural integral of the precision-recall curve. 275 optional string ap_version = 42 [default = "Integral"]; 276 // If true, display per class result. 277 optional bool show_per_class_result = 44 [default = false]; 278 279 // The number of iterations for each test net. 280 repeated int32 test_iter = 3; 281 282 // The number of iterations between two testing phases. 283 optional int32 test_interval = 4 [default = 0]; 284 optional bool test_compute_loss = 19 [default = false]; 285 // If true, run an initial test pass before the first iteration, 286 // ensuring memory availability and printing the starting value of the loss. 287 optional bool test_initialization = 32 [default = true]; 288 optional float base_lr = 5; // The base learning rate 289 // the number of iterations between displaying info. If display = 0, no info 290 // will be displayed. 291 optional int32 display = 6; 292 // Display the loss averaged over the last average_loss iterations 293 optional int32 average_loss = 33 [default = 1]; 294 optional int32 max_iter = 7; // the maximum number of iterations 295 // accumulate gradients over `iter_size` x `batch_size` instances 296 optional int32 iter_size = 36 [default = 1]; 297 298 // The learning rate decay policy. The currently implemented learning rate 299 // policies are as follows: 300 // - fixed: always return base_lr. 301 // - step: return base_lr * gamma ^ (floor(iter / step)) 302 // - exp: return base_lr * gamma ^ iter 303 // - inv: return base_lr * (1 + gamma * iter) ^ (- power) 304 // - multistep: similar to step but it allows non uniform steps defined by 305 // stepvalue 306 // - poly: the effective learning rate follows a polynomial decay, to be 307 // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) 308 // - sigmoid: the effective learning rate follows a sigmod decay 309 // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) 310 // 311 // where base_lr, max_iter, gamma, step, stepvalue and power are defined 312 // in the solver parameter protocol buffer, and iter is the current iteration. 313 optional string lr_policy = 8; 314 optional float gamma = 9; // The parameter to compute the learning rate. 315 optional float power = 10; // The parameter to compute the learning rate. 316 optional float momentum = 11; // The momentum value. 317 optional float weight_decay = 12; // The weight decay. 318 // regularization types supported: L1 and L2 319 // controlled by weight_decay 320 optional string regularization_type = 29 [default = "L2"]; 321 // the stepsize for learning rate policy "step" 322 optional int32 stepsize = 13; 323 // the stepsize for learning rate policy "multistep" 324 repeated int32 stepvalue = 34; 325 326 // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, 327 // whenever their actual L2 norm is larger. 328 optional float clip_gradients = 35 [default = -1]; 329 330 optional int32 snapshot = 14 [default = 0]; // The snapshot interval 331 optional string snapshot_prefix = 15; // The prefix for the snapshot. 332 // whether to snapshot diff in the results or not. Snapshotting diff will help 333 // debugging but the final protocol buffer size will be much larger. 334 optional bool snapshot_diff = 16 [default = false]; 335 enum SnapshotFormat { 336 HDF5 = 0; 337 BINARYPROTO = 1; 338 } 339 optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; 340 // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 341 enum SolverMode { 342 CPU = 0; 343 GPU = 1; 344 } 345 optional SolverMode solver_mode = 17 [default = GPU]; 346 // the device_id will that be used in GPU mode. Use device_id = 0 in default. 347 optional int32 device_id = 18 [default = 0]; 348 // If non-negative, the seed with which the Solver will initialize the Caffe 349 // random number generator -- useful for reproducible results. Otherwise, 350 // (and by default) initialize using a seed derived from the system clock. 351 optional int64 random_seed = 20 [default = -1]; 352 353 // type of the solver 354 optional string type = 40 [default = "SGD"]; 355 356 // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam 357 optional float delta = 31 [default = 1e-8]; 358 // parameters for the Adam solver 359 optional float momentum2 = 39 [default = 0.999]; 360 361 // RMSProp decay value 362 // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) 363 optional float rms_decay = 38 [default = 0.99]; 364 365 // If true, print information about the state of the net that may help with 366 // debugging learning problems. 367 optional bool debug_info = 23 [default = false]; 368 369 // If false, don't save a snapshot after training finishes. 370 optional bool snapshot_after_train = 28 [default = true]; 371 372 // DEPRECATED: old solver enum types, use string instead 373 enum SolverType { 374 SGD = 0; 375 NESTEROV = 1; 376 ADAGRAD = 2; 377 RMSPROP = 3; 378 ADADELTA = 4; 379 ADAM = 5; 380 } 381 // DEPRECATED: use type instead of solver_type 382 optional SolverType solver_type = 30 [default = SGD]; 383 384 // Overlap compute and communication for data parallel training 385 optional bool layer_wise_reduce = 45 [default = true]; 386} 387 388// A message that stores the solver snapshots 389message SolverState { 390 optional int32 iter = 1; // The current iteration 391 optional string learned_net = 2; // The file that stores the learned net. 392 repeated BlobProto history = 3; // The history for sgd solvers 393 optional int32 current_step = 4 [default = 0]; // The current step for learning rate 394} 395 396enum Phase { 397 TRAIN = 0; 398 TEST = 1; 399 QUAN = 2; 400 INT8 = 3; 401} 402 403message NetState { 404 optional Phase phase = 1 [default = TEST]; 405 optional int32 level = 2 [default = 0]; 406 repeated string stage = 3; 407} 408 409message NetStateRule { 410 // Set phase to require the NetState have a particular phase (TRAIN or TEST) 411 // to meet this rule. 412 optional Phase phase = 1; 413 414 // Set the minimum and/or maximum levels in which the layer should be used. 415 // Leave undefined to meet the rule regardless of level. 416 optional int32 min_level = 2; 417 optional int32 max_level = 3; 418 419 // Customizable sets of stages to include or exclude. 420 // The net must have ALL of the specified stages and NONE of the specified 421 // "not_stage"s to meet the rule. 422 // (Use multiple NetStateRules to specify conjunctions of stages.) 423 repeated string stage = 4; 424 repeated string not_stage = 5; 425} 426 427// Specifies training parameters (multipliers on global learning constants, 428// and the name and other settings used for weight sharing). 429message ParamSpec { 430 // The names of the parameter blobs -- useful for sharing parameters among 431 // layers, but never required otherwise. To share a parameter between two 432 // layers, give it a (non-empty) name. 433 optional string name = 1; 434 435 // Whether to require shared weights to have the same shape, or just the same 436 // count -- defaults to STRICT if unspecified. 437 optional DimCheckMode share_mode = 2; 438 enum DimCheckMode { 439 // STRICT (default) requires that num, channels, height, width each match. 440 STRICT = 0; 441 // PERMISSIVE requires only the count (num*channels*height*width) to match. 442 PERMISSIVE = 1; 443 } 444 445 // The multiplier on the global learning rate for this parameter. 446 optional float lr_mult = 3 [default = 1.0]; 447 448 // The multiplier on the global weight decay for this parameter. 449 optional float decay_mult = 4 [default = 1.0]; 450} 451 452// NOTE 453// Update the next available ID when you add a new LayerParameter field. 454// 455// LayerParameter next available layer-specific ID: 149 (last added: Pooling3DParameter) 456message LayerParameter { 457 optional string name = 1; // the layer name 458 optional string type = 2; // the layer type 459 repeated string bottom = 3; // the name of each bottom blob 460 repeated string top = 4; // the name of each top blob 461 462 // The train / test phase for computation. 463 optional Phase phase = 10; 464 465 // The amount of weight to assign each top blob in the objective. 466 // Each layer assigns a default value, usually of either 0 or 1, 467 // to each top blob. 468 repeated float loss_weight = 5; 469 470 // Specifies training parameters (multipliers on global learning constants, 471 // and the name and other settings used for weight sharing). 472 repeated ParamSpec param = 6; 473 474 // The blobs containing the numeric parameters of the layer. 475 repeated BlobProto blobs = 7; 476 477 // Specifies whether to backpropagate to each bottom. If unspecified, 478 // Caffe will automatically infer whether each input needs backpropagation 479 // to compute parameter gradients. If set to true for some inputs, 480 // backpropagation to those inputs is forced; if set false for some inputs, 481 // backpropagation to those inputs is skipped. 482 // 483 // The size must be either 0 or equal to the number of bottoms. 484 repeated bool propagate_down = 11; 485 486 // Rules controlling whether and when a layer is included in the network, 487 // based on the current NetState. You may specify a non-zero number of rules 488 // to include OR exclude, but not both. If no include or exclude rules are 489 // specified, the layer is always included. If the current NetState meets 490 // ANY (i.e., one or more) of the specified rules, the layer is 491 // included/excluded. 492 repeated NetStateRule include = 8; 493 repeated NetStateRule exclude = 9; 494 495 // Parameters for data pre-processing. 496 optional TransformationParameter transform_param = 100; 497 498 // Parameters shared by loss layers. 499 optional LossParameter loss_param = 101; 500 501 // Layer type-specific parameters. 502 // 503 // Note: certain layers may have more than one computational engine 504 // for their implementation. These layers include an Engine type and 505 // engine parameter for selecting the implementation. 506 // The default for the engine is set by the ENGINE switch at compile-time. 507 optional AccuracyParameter accuracy_param = 102; 508 optional AnnotatedDataParameter annotated_data_param = 200; 509 optional ArgMaxParameter argmax_param = 103; 510 optional BatchNormParameter batch_norm_param = 139; 511 optional BiasParameter bias_param = 141; 512 optional ConcatParameter concat_param = 104; 513 optional ContrastiveLossParameter contrastive_loss_param = 105; 514 optional ConvolutionParameter convolution_param = 106; 515 optional Convolution3DParameter convolution3d_param = 147; 516 optional CropParameter crop_param = 144; 517 optional DataParameter data_param = 107; 518 optional DetectionEvaluateParameter detection_evaluate_param = 205; 519 optional DetectionOutputParameter detection_output_param = 204; 520 optional DropoutParameter dropout_param = 108; 521 optional DummyDataParameter dummy_data_param = 109; 522 optional EltwiseParameter eltwise_param = 110; 523 optional ELUParameter elu_param = 140; 524 optional EmbedParameter embed_param = 137; 525 optional ExpParameter exp_param = 111; 526 optional FlattenParameter flatten_param = 135; 527 optional HDF5DataParameter hdf5_data_param = 112; 528 optional HDF5OutputParameter hdf5_output_param = 113; 529 optional HingeLossParameter hinge_loss_param = 114; 530 optional ImageDataParameter image_data_param = 115; 531 optional InfogainLossParameter infogain_loss_param = 116; 532 optional InnerProductParameter inner_product_param = 117; 533 optional InputParameter input_param = 143; 534 optional LogParameter log_param = 134; 535 optional LRNParameter lrn_param = 118; 536 optional LSTMParameter lstm_param = 210; 537 optional Im2seqParameter im2seq_param = 207; 538 optional Seq2outParameter seq2out_param = 208; 539 optional ImgSizeParameter img_size_param = 209; 540 optional MemoryDataParameter memory_data_param = 119; 541 optional MultiBoxLossParameter multibox_loss_param = 201; 542 optional MVNParameter mvn_param = 120; 543 optional NormalizeParameter norm_param = 206; 544 optional ParameterParameter parameter_param = 145; 545 optional PermuteParameter permute_param = 202; 546 optional PoolingParameter pooling_param = 121; 547 optional Pooling3DParameter pooling3d_param = 148; 548 optional PowerParameter power_param = 122; 549 optional PReLUParameter prelu_param = 131; 550 optional PriorBoxParameter prior_box_param = 203; 551 optional PythonParameter python_param = 130; 552 optional RecurrentParameter recurrent_param = 146; 553 optional ReductionParameter reduction_param = 136; 554 optional ReLUParameter relu_param = 123; 555 optional ReshapeParameter reshape_param = 133; 556 optional ScaleParameter scale_param = 142; 557 optional SigmoidParameter sigmoid_param = 124; 558 optional SoftmaxParameter softmax_param = 125; 559 optional SPPParameter spp_param = 132; 560 optional SliceParameter slice_param = 126; 561 optional TanHParameter tanh_param = 127; 562 optional ThresholdParameter threshold_param = 128; 563 optional TileParameter tile_param = 138; 564 optional WindowDataParameter window_data_param = 129; 565 optional CenterLossParameter center_loss_param = 211; 566 optional ShuffleChannelParameter shuffle_channel_param = 212; 567 568 optional InterpParameter interp_param = 2210; 569 optional ROIPoolingParameter roi_pooling_param = 2201; 570 optional ClipParameter clip_param = 2202; 571 optional UpsampleParameter upsample_param = 2203; 572} 573 574// Message that stores parameters used by ClipLayer 575message ClipParameter { 576 required float min = 1; 577 required float max = 2; 578} 579 580// Message that stores parameters used by ROIPoolingLayer 581message ROIPoolingParameter { 582 // Pad, kernel size, and stride are all given as a single value for equal 583 // dimensions in height and width or as Y, X pairs. 584 optional uint32 pooled_h = 1 [default = 0]; // The pooled output height 585 optional uint32 pooled_w = 2 [default = 0]; // The pooled output width 586 // Multiplicative spatial scale factor to translate ROI coords from their 587 // input scale to the scale used when pooling 588 optional float spatial_scale = 3 [default = 1]; 589} 590 591message InterpParameter { 592 optional int32 height = 1 [default = 0]; // Height of output 593 optional int32 width = 2 [default = 0]; // Width of output 594 optional int32 zoom_factor = 3 [default = 1]; // zoom factor 595 optional int32 shrink_factor = 4 [default = 1]; // shrink factor 596 optional int32 pad_beg = 5 [default = 0]; // padding at begin of input 597 optional int32 pad_end = 6 [default = 0]; // padding at end of input 598} 599 600// Message that stores parameters used by LSTMParameter 601message LSTMParameter { 602 optional uint32 num_output = 1; // The number of outputs for the layer 603 optional float clipping_threshold = 2 [default = 0.0]; 604 optional FillerParameter weight_filler = 3; // The filler for weight 605 optional FillerParameter bias_filler = 4; // The filler for the bias 606 optional uint32 batch_size = 5 [default = 1]; 607 optional uint32 paramq = 6; 608 optional float scale_in = 7; 609 optional float scale_out = 8; 610 optional int32 a_min = 9 [default = -128]; 611 optional int32 a_max = 10 [default = 127]; 612} 613 614//Message that stores parameters used by Im2SeqParameter 615message Im2seqParameter { 616 enum Direction { 617 FORWARD = 0; 618 REVERSED = 1; 619 TOPDOWN = 2; 620 BOTTOMUP = 3; 621 } 622 optional Direction direction = 1 [default = FORWARD]; 623 optional uint32 size = 2 [default = 1]; 624 optional uint32 stride = 3 [default = 1]; 625 optional bool line2time_loss = 4 [default = false]; 626} 627 628message CenterLossParameter { 629 optional uint32 num_output = 1; // The number of outputs for the layer 630 optional FillerParameter center_filler = 2; // The filler for the centers 631 // The first axis to be lumped into a single inner product computation; 632 // all preceding axes are retained in the output. 633 // May be negative to index from the end (e.g., -1 for the last axis). 634 optional int32 axis = 3 [default = 1]; 635 optional string distance_type = 29 [default = "L2"]; 636} 637 638// Message that stores parameters used by Seq2outParameter 639message Seq2outParameter { 640} 641 642// Message that stores parameter used in variable size images 643message ImgSizeParameter { 644 optional float x_scaling = 1 [default = 1]; 645 optional float y_scaling = 2 [default = 1]; 646 // Parameters used to restore pretrain layers (ocr team defined) 647 optional string pretrained_file = 500; 648 optional string pretrained_layer_name = 501; 649 optional string pretrained_layer_bin = 502; 650} 651 652// Message that stores parameters used to apply transformation 653// to the data layer's data 654message TransformationParameter { 655 // For data pre-processing, we can do simple scaling and subtracting the 656 // data mean, if provided. Note that the mean subtraction is always carried 657 // out before scaling. 658 optional float scale = 1 [default = 1]; 659 // Specify if we want to randomly mirror data. 660 optional bool mirror = 2 [default = false]; 661 // Specify if we would like to randomly crop an image. 662 optional uint32 crop_size = 3 [default = 0]; 663 optional uint32 crop_h = 11 [default = 0]; 664 optional uint32 crop_w = 12 [default = 0]; 665 666 // mean_file and mean_value cannot be specified at the same time 667 optional string mean_file = 4; 668 // if specified can be repeated once (would substract it from all the channels) 669 // or can be repeated the same number of times as channels 670 // (would subtract them from the corresponding channel) 671 repeated float mean_value = 5; 672 // Force the decoded image to have 3 color channels. 673 optional bool force_color = 6 [default = false]; 674 // Force the decoded image to have 1 color channels. 675 optional bool force_gray = 7 [default = false]; 676 // Resize policy 677 optional ResizeParameter resize_param = 8; 678 // Noise policy 679 optional NoiseParameter noise_param = 9; 680 // Distortion policy 681 optional DistortionParameter distort_param = 13; 682 // Expand policy 683 optional ExpansionParameter expand_param = 14; 684 // Constraint for emitting the annotation after transformation. 685 optional EmitConstraint emit_constraint = 10; 686} 687 688// Message that stores parameters used by data transformer for resize policy 689message ResizeParameter { 690 //Probability of using this resize policy 691 optional float prob = 1 [default = 1]; 692 693 enum Resize_mode { 694 WARP = 1; 695 FIT_SMALL_SIZE = 2; 696 FIT_LARGE_SIZE_AND_PAD = 3; 697 } 698 optional Resize_mode resize_mode = 2 [default = WARP]; 699 optional uint32 height = 3 [default = 0]; 700 optional uint32 width = 4 [default = 0]; 701 // A parameter used to update bbox in FIT_SMALL_SIZE mode. 702 optional uint32 height_scale = 8 [default = 0]; 703 optional uint32 width_scale = 9 [default = 0]; 704 705 enum Pad_mode { 706 CONSTANT = 1; 707 MIRRORED = 2; 708 REPEAT_NEAREST = 3; 709 } 710 // Padding mode for BE_SMALL_SIZE_AND_PAD mode and object centering 711 optional Pad_mode pad_mode = 5 [default = CONSTANT]; 712 // if specified can be repeated once (would fill all the channels) 713 // or can be repeated the same number of times as channels 714 // (would use it them to the corresponding channel) 715 repeated float pad_value = 6; 716 717 enum Interp_mode { //Same as in OpenCV 718 LINEAR = 1; 719 AREA = 2; 720 NEAREST = 3; 721 CUBIC = 4; 722 LANCZOS4 = 5; 723 } 724 //interpolation for for resizing 725 repeated Interp_mode interp_mode = 7; 726} 727 728message SaltPepperParameter { 729 //Percentage of pixels 730 optional float fraction = 1 [default = 0]; 731 repeated float value = 2; 732} 733 734// Message that stores parameters used by data transformer for transformation 735// policy 736message NoiseParameter { 737 //Probability of using this resize policy 738 optional float prob = 1 [default = 0]; 739 // Histogram equalized 740 optional bool hist_eq = 2 [default = false]; 741 // Color inversion 742 optional bool inverse = 3 [default = false]; 743 // Grayscale 744 optional bool decolorize = 4 [default = false]; 745 // Gaussian blur 746 optional bool gauss_blur = 5 [default = false]; 747 748 // JPEG compression quality (-1 = no compression) 749 optional float jpeg = 6 [default = -1]; 750 751 // Posterization 752 optional bool posterize = 7 [default = false]; 753 754 // Erosion 755 optional bool erode = 8 [default = false]; 756 757 // Salt-and-pepper noise 758 optional bool saltpepper = 9 [default = false]; 759 760 optional SaltPepperParameter saltpepper_param = 10; 761 762 // Local histogram equalization 763 optional bool clahe = 11 [default = false]; 764 765 // Color space conversion 766 optional bool convert_to_hsv = 12 [default = false]; 767 768 // Color space conversion 769 optional bool convert_to_lab = 13 [default = false]; 770} 771 772// Message that stores parameters used by data transformer for distortion policy 773message DistortionParameter { 774 // The probability of adjusting brightness. 775 optional float brightness_prob = 1 [default = 0.0]; 776 // Amount to add to the pixel values within [-delta, delta]. 777 // The possible value is within [0, 255]. Recommend 32. 778 optional float brightness_delta = 2 [default = 0.0]; 779 780 // The probability of adjusting contrast. 781 optional float contrast_prob = 3 [default = 0.0]; 782 // Lower bound for random contrast factor. Recommend 0.5. 783 optional float contrast_lower = 4 [default = 0.0]; 784 // Upper bound for random contrast factor. Recommend 1.5. 785 optional float contrast_upper = 5 [default = 0.0]; 786 787 // The probability of adjusting hue. 788 optional float hue_prob = 6 [default = 0.0]; 789 // Amount to add to the hue channel within [-delta, delta]. 790 // The possible value is within [0, 180]. Recommend 36. 791 optional float hue_delta = 7 [default = 0.0]; 792 793 // The probability of adjusting saturation. 794 optional float saturation_prob = 8 [default = 0.0]; 795 // Lower bound for the random saturation factor. Recommend 0.5. 796 optional float saturation_lower = 9 [default = 0.0]; 797 // Upper bound for the random saturation factor. Recommend 1.5. 798 optional float saturation_upper = 10 [default = 0.0]; 799 800 // The probability of randomly order the image channels. 801 optional float random_order_prob = 11 [default = 0.0]; 802} 803 804// Message that stores parameters used by data transformer for expansion policy 805message ExpansionParameter { 806 //Probability of using this expansion policy 807 optional float prob = 1 [default = 1]; 808 809 // The ratio to expand the image. 810 optional float max_expand_ratio = 2 [default = 1.]; 811} 812 813// Message that stores parameters shared by loss layers 814message LossParameter { 815 // If specified, ignore instances with the given label. 816 optional int32 ignore_label = 1; 817 // How to normalize the loss for loss layers that aggregate across batches, 818 // spatial dimensions, or other dimensions. Currently only implemented in 819 // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. 820 enum NormalizationMode { 821 // Divide by the number of examples in the batch times spatial dimensions. 822 // Outputs that receive the ignore label will NOT be ignored in computing 823 // the normalization factor. 824 FULL = 0; 825 // Divide by the total number of output locations that do not take the 826 // ignore_label. If ignore_label is not set, this behaves like FULL. 827 VALID = 1; 828 // Divide by the batch size. 829 BATCH_SIZE = 2; 830 // Do not normalize the loss. 831 NONE = 3; 832 } 833 // For historical reasons, the default normalization for 834 // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. 835 optional NormalizationMode normalization = 3 [default = VALID]; 836 // Deprecated. Ignored if normalization is specified. If normalization 837 // is not specified, then setting this to false will be equivalent to 838 // normalization = BATCH_SIZE to be consistent with previous behavior. 839 optional bool normalize = 2; 840 optional bool is_num_scale = 4 [default = true]; 841} 842 843// Messages that store parameters used by individual layer types follow, in 844// alphabetical order. 845 846message AccuracyParameter { 847 // When computing accuracy, count as correct by comparing the true label to 848 // the top k scoring classes. By default, only compare to the top scoring 849 // class (i.e. argmax). 850 optional uint32 top_k = 1 [default = 1]; 851 852 // The "label" axis of the prediction blob, whose argmax corresponds to the 853 // predicted label -- may be negative to index from the end (e.g., -1 for the 854 // last axis). For example, if axis == 1 and the predictions are 855 // (N x C x H x W), the label blob is expected to contain N*H*W ground truth 856 // labels with integer values in {0, 1, ..., C-1}. 857 optional int32 axis = 2 [default = 1]; 858 859 // If specified, ignore instances with the given label. 860 optional int32 ignore_label = 3; 861} 862 863message AnnotatedDataParameter { 864 // Define the sampler. 865 repeated BatchSampler batch_sampler = 1; 866 // Store label name and label id in LabelMap format. 867 optional string label_map_file = 2; 868 // If provided, it will replace the AnnotationType stored in each 869 // AnnotatedDatum. 870 optional AnnotatedDatum.AnnotationType anno_type = 3; 871} 872 873message ArgMaxParameter { 874 // If true produce pairs (argmax, maxval) 875 optional bool out_max_val = 1 [default = false]; 876 optional uint32 top_k = 2 [default = 1]; 877 // The axis along which to maximise -- may be negative to index from the 878 // end (e.g., -1 for the last axis). 879 // By default ArgMaxLayer maximizes over the flattened trailing dimensions 880 // for each index of the first / num dimension. 881 optional int32 axis = 3; 882 optional bool softmax_threshold = 4 [default = false]; 883} 884 885message ConcatParameter { 886 // The axis along which to concatenate -- may be negative to index from the 887 // end (e.g., -1 for the last axis). Other axes must have the 888 // same dimension for all the bottom blobs. 889 // By default, ConcatLayer concatenates blobs along the "channels" axis (1). 890 optional int32 axis = 2 [default = 1]; 891 892 // DEPRECATED: alias for "axis" -- does not support negative indexing. 893 optional uint32 concat_dim = 1 [default = 1]; 894} 895 896message BatchNormParameter { 897 // If false, normalization is performed over the current mini-batch 898 // and global statistics are accumulated (but not yet used) by a moving 899 // average. 900 // If true, those accumulated mean and variance values are used for the 901 // normalization. 902 // By default, it is set to false when the network is in the training 903 // phase and true when the network is in the testing phase. 904 optional bool use_global_stats = 1; 905 // What fraction of the moving average remains each iteration? 906 // Smaller values make the moving average decay faster, giving more 907 // weight to the recent values. 908 // Each iteration updates the moving average @f$S_{t-1}@f$ with the 909 // current mean @f$ Y_t @f$ by 910 // @f$ S_t = (1-\beta)Y_t + \beta \cdot S_{t-1} @f$, where @f$ \beta @f$ 911 // is the moving_average_fraction parameter. 912 optional float moving_average_fraction = 2 [default = .999]; 913 // Small value to add to the variance estimate so that we don't divide by 914 // zero. 915 optional float eps = 3 [default = 1e-5]; 916 optional bool use_weight_bias = 5 [default = true]; 917 optional bool bias_term = 6 [default = true]; // whether to have bias terms 918 optional FillerParameter filler = 7; // The filler for the weight 919 optional FillerParameter bias_filler = 8; // The filler for the bias 920 optional FillerParameter scale_filler = 9; // The filler for the bias 921} 922 923message BiasParameter { 924 // The first axis of bottom[0] (the first input Blob) along which to apply 925 // bottom[1] (the second input Blob). May be negative to index from the end 926 // (e.g., -1 for the last axis). 927 // 928 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 929 // top[0] will have the same shape, and bottom[1] may have any of the 930 // following shapes (for the given value of axis): 931 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 932 // (axis == 1 == -3) 3; 3x40; 3x40x60 933 // (axis == 2 == -2) 40; 40x60 934 // (axis == 3 == -1) 60 935 // Furthermore, bottom[1] may have the empty shape (regardless of the value of 936 // "axis") -- a scalar bias. 937 optional int32 axis = 1 [default = 1]; 938 939 // (num_axes is ignored unless just one bottom is given and the bias is 940 // a learned parameter of the layer. Otherwise, num_axes is determined by the 941 // number of axes by the second bottom.) 942 // The number of axes of the input (bottom[0]) covered by the bias 943 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 944 // Set num_axes := 0, to add a zero-axis Blob: a scalar. 945 optional int32 num_axes = 2 [default = 1]; 946 947 // (filler is ignored unless just one bottom is given and the bias is 948 // a learned parameter of the layer.) 949 // The initialization for the learned bias parameter. 950 // Default is the zero (0) initialization, resulting in the BiasLayer 951 // initially performing the identity operation. 952 optional FillerParameter filler = 3; 953} 954 955message ContrastiveLossParameter { 956 // margin for dissimilar pair 957 optional float margin = 1 [default = 1.0]; 958 // The first implementation of this cost did not exactly match the cost of 959 // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. 960 // legacy_version = false (the default) uses (margin - d)^2 as proposed in the 961 // Hadsell paper. New models should probably use this version. 962 // legacy_version = true uses (margin - d^2). This is kept to support / 963 // reproduce existing models and results 964 optional bool legacy_version = 2 [default = false]; 965} 966 967message ConvolutionParameter { 968 optional uint32 num_output = 1; // The number of outputs for the layer 969 optional bool bias_term = 2 [default = true]; // whether to have bias terms 970 971 // Pad, kernel size, and stride are all given as a single value for equal 972 // dimensions in all spatial dimensions, or once per spatial dimension. 973 repeated uint32 pad = 3; // The padding size; defaults to 0 974 repeated uint32 kernel_size = 4; // The kernel size 975 repeated uint32 stride = 6; // The stride; defaults to 1 976 // Factor used to dilate the kernel, (implicitly) zero-filling the resulting 977 // holes. (Kernel dilation is sometimes referred to by its use in the 978 // algorithme à trous from Holschneider et al. 1987.) 979 repeated uint32 dilation = 18; // The dilation; defaults to 1 980 981 // For 2D convolution only, the *_h and *_w versions may also be used to 982 // specify both spatial dimensions. 983 optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) 984 optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) 985 optional uint32 kernel_h = 11; // The kernel height (2D only) 986 optional uint32 kernel_w = 12; // The kernel width (2D only) 987 optional uint32 stride_h = 13; // The stride height (2D only) 988 optional uint32 stride_w = 14; // The stride width (2D only) 989 990 optional uint32 group = 5 [default = 1]; // The group size for group conv 991 992 optional FillerParameter weight_filler = 7; // The filler for the weight 993 optional FillerParameter bias_filler = 8; // The filler for the bias 994 enum Engine { 995 DEFAULT = 0; 996 CAFFE = 1; 997 CUDNN = 2; 998 CUDNN_FORWARD = 3; 999 } 1000 optional Engine engine = 15 [default = DEFAULT]; 1001 1002 // The axis to interpret as "channels" when performing convolution. 1003 // Preceding dimensions are treated as independent inputs; 1004 // succeeding dimensions are treated as "spatial". 1005 // With (N, C, H, W) inputs, and axis == 1 (the default), we perform 1006 // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for 1007 // groups g>1) filters across the spatial axes (H, W) of the input. 1008 // With (N, C, D, H, W) inputs, and axis == 1, we perform 1009 // N independent 3D convolutions, sliding (C/g)-channels 1010 // filters across the spatial axes (D, H, W) of the input. 1011 optional int32 axis = 16 [default = 1]; 1012 1013 // Whether to force use of the general ND convolution, even if a specific 1014 // implementation for blobs of the appropriate number of spatial dimensions 1015 // is available. (Currently, there is only a 2D-specific convolution 1016 // implementation; for input blobs with num_axes != 2, this option is 1017 // ignored and the ND implementation will be used.) 1018 optional bool force_nd_im2col = 17 [default = false]; 1019 optional uint32 paramq = 19; 1020 optional float params = 20 [default = 0]; 1021 optional float scale_in = 21; 1022 optional float scale_out = 22; 1023 optional int32 a_min = 23 [default = -128]; 1024 optional int32 a_max = 24 [default = 127]; 1025} 1026 1027// https://github.com/facebook/C3D/blob/master/C3D-v1.1/src/caffe/proto/caffe.proto 1028message Convolution3DParameter { 1029 optional uint32 num_output = 1; // The number of outputs for the layer 1030 optional bool bias_term = 2 [default = true]; // whether to have bias terms 1031 optional uint32 pad = 3 [default = 0]; // The padding size 1032 optional uint32 kernel_size = 4; // The kernel size 1033 optional uint32 group = 5 [default = 1]; // The group size for group conv 1034 optional uint32 kernel_depth = 6; // The kernel size 1035 optional uint32 stride = 7 [default = 1]; // The stride 1036 optional uint32 temporal_stride = 8 [default = 1]; // The stride for temporal 1037 optional FillerParameter weight_filler = 9; // The filler for the weight 1038 optional FillerParameter bias_filler = 10; // The filler for the bias 1039 optional uint32 filter_group = 11 [default = 1]; // divide filters into groups to reduce memory consumption 1040 optional uint32 temporal_pad = 12 [default = 0]; // padding size for temporal 1041} 1042 1043message CropParameter { 1044 // To crop, elements of the first bottom are selected to fit the dimensions 1045 // of the second, reference bottom. The crop is configured by 1046 // - the crop `axis` to pick the dimensions for cropping 1047 // - the crop `offset` to set the shift for all/each dimension 1048 // to align the cropped bottom with the reference bottom. 1049 // All dimensions up to but excluding `axis` are preserved, while 1050 // the dimensions including and trailing `axis` are cropped. 1051 // If only one `offset` is set, then all dimensions are offset by this amount. 1052 // Otherwise, the number of offsets must equal the number of cropped axes to 1053 // shift the crop in each dimension accordingly. 1054 // Note: standard dimensions are N,C,H,W so the default is a spatial crop, 1055 // and `axis` may be negative to index from the end (e.g., -1 for the last 1056 // axis). 1057 optional int32 axis = 1 [default = 2]; 1058 repeated uint32 offset = 2; 1059} 1060 1061message DataParameter { 1062 enum DB { 1063 LEVELDB = 0; 1064 LMDB = 1; 1065 } 1066 // Specify the data source. 1067 optional string source = 1; 1068 // Specify the batch size. 1069 optional uint32 batch_size = 4; 1070 // The rand_skip variable is for the data layer to skip a few data points 1071 // to avoid all asynchronous sgd clients to start at the same point. The skip 1072 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1073 // be larger than the number of keys in the database. 1074 // DEPRECATED. Each solver accesses a different subset of the database. 1075 optional uint32 rand_skip = 7 [default = 0]; 1076 optional DB backend = 8 [default = LEVELDB]; 1077 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 1078 // simple scaling and subtracting the data mean, if provided. Note that the 1079 // mean subtraction is always carried out before scaling. 1080 optional float scale = 2 [default = 1]; 1081 optional string mean_file = 3; 1082 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 1083 // crop an image. 1084 optional uint32 crop_size = 5 [default = 0]; 1085 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 1086 // data. 1087 optional bool mirror = 6 [default = false]; 1088 // Force the encoded image to have 3 color channels 1089 optional bool force_encoded_color = 9 [default = false]; 1090 // Prefetch queue (Increase if data feeding bandwidth varies, within the 1091 // limit of device memory for GPU training) 1092 optional uint32 prefetch = 10 [default = 4]; 1093 optional uint32 label_size = 11 [default = 0]; 1094 optional uint32 data_width = 12 [default = 0]; 1095 optional bool output_width = 13 [default = false]; 1096} 1097 1098// Message that store parameters used by DetectionEvaluateLayer 1099message DetectionEvaluateParameter { 1100 // Number of classes that are actually predicted. Required! 1101 optional uint32 num_classes = 1; 1102 // Label id for background class. Needed for sanity check so that 1103 // background class is neither in the ground truth nor the detections. 1104 optional uint32 background_label_id = 2 [default = 0]; 1105 // Threshold for deciding true/false positive. 1106 optional float overlap_threshold = 3 [default = 0.5]; 1107 // If true, also consider difficult ground truth for evaluation. 1108 optional bool evaluate_difficult_gt = 4 [default = true]; 1109 // A file which contains a list of names and sizes with same order 1110 // of the input DB. The file is in the following format: 1111 // name height width 1112 // ... 1113 // If provided, we will scale the prediction and ground truth NormalizedBBox 1114 // for evaluation. 1115 optional string name_size_file = 5; 1116 // The resize parameter used in converting NormalizedBBox to original image. 1117 optional ResizeParameter resize_param = 6; 1118} 1119 1120message NonMaximumSuppressionParameter { 1121 // Threshold to be used in nms. 1122 optional float nms_threshold = 1 [default = 0.3]; 1123 // Maximum number of results to be kept. 1124 optional int32 top_k = 2; 1125 // Parameter for adaptive nms. 1126 optional float eta = 3 [default = 1.0]; 1127} 1128 1129message SaveOutputParameter { 1130 // Output directory. If not empty, we will save the results. 1131 optional string output_directory = 1; 1132 // Output name prefix. 1133 optional string output_name_prefix = 2; 1134 // Output format. 1135 // VOC - PASCAL VOC output format. 1136 // COCO - MS COCO output format. 1137 optional string output_format = 3; 1138 // If you want to output results, must also provide the following two files. 1139 // Otherwise, we will ignore saving results. 1140 // label map file. 1141 optional string label_map_file = 4; 1142 // A file which contains a list of names and sizes with same order 1143 // of the input DB. The file is in the following format: 1144 // name height width 1145 // ... 1146 optional string name_size_file = 5; 1147 // Number of test images. It can be less than the lines specified in 1148 // name_size_file. For example, when we only want to evaluate on part 1149 // of the test images. 1150 optional uint32 num_test_image = 6; 1151 // The resize parameter used in saving the data. 1152 optional ResizeParameter resize_param = 7; 1153} 1154 1155// Message that store parameters used by DetectionOutputLayer 1156message DetectionOutputParameter { 1157 // Number of classes to be predicted. Required! 1158 optional uint32 num_classes = 1; 1159 // If true, bounding box are shared among different classes. 1160 optional bool share_location = 2 [default = true]; 1161 // Background label id. If there is no background class, 1162 // set it as -1. 1163 optional int32 background_label_id = 3 [default = 0]; 1164 // Parameters used for non maximum suppression. 1165 optional NonMaximumSuppressionParameter nms_param = 4; 1166 // Parameters used for saving detection results. 1167 optional SaveOutputParameter save_output_param = 5; 1168 // Type of coding method for bbox. 1169 optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER]; 1170 // If true, variance is encoded in target; otherwise we need to adjust the 1171 // predicted offset accordingly. 1172 optional bool variance_encoded_in_target = 8 [default = false]; 1173 // Number of total bboxes to be kept per image after nms step. 1174 // -1 means keeping all bboxes after nms step. 1175 optional int32 keep_top_k = 7 [default = -1]; 1176 // Only consider detections whose confidences are larger than a threshold. 1177 // If not provided, consider all boxes. 1178 optional float confidence_threshold = 9; 1179 // If true, visualize the detection results. 1180 optional bool visualize = 10 [default = false]; 1181 // The threshold used to visualize the detection results. 1182 optional float visualize_threshold = 11; 1183 // If provided, save outputs to video file. 1184 optional string save_file = 12; 1185 optional float objectness_score = 24 [default = 0.01]; 1186} 1187 1188message DropoutParameter { 1189 optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio 1190} 1191 1192// DummyDataLayer fills any number of arbitrarily shaped blobs with random 1193// (or constant) data generated by "Fillers" (see "message FillerParameter"). 1194message DummyDataParameter { 1195 // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N 1196 // shape fields, and 0, 1 or N data_fillers. 1197 // 1198 // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. 1199 // If 1 data_filler is specified, it is applied to all top blobs. If N are 1200 // specified, the ith is applied to the ith top blob. 1201 repeated FillerParameter data_filler = 1; 1202 repeated BlobShape shape = 6; 1203 1204 // 4D dimensions -- deprecated. Use "shape" instead. 1205 repeated uint32 num = 2; 1206 repeated uint32 channels = 3; 1207 repeated uint32 height = 4; 1208 repeated uint32 width = 5; 1209} 1210 1211message EltwiseParameter { 1212 enum EltwiseOp { 1213 PROD = 0; 1214 SUM = 1; 1215 MAX = 2; 1216 } 1217 optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation 1218 repeated float coeff = 2; // blob-wise coefficient for SUM operation 1219 1220 // Whether to use an asymptotically slower (for >2 inputs) but stabler method 1221 // of computing the gradient for the PROD operation. (No effect for SUM op.) 1222 optional bool stable_prod_grad = 3 [default = true]; 1223} 1224 1225// Message that stores parameters used by ELULayer 1226message ELUParameter { 1227 // Described in: 1228 // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate 1229 // Deep Network Learning by Exponential Linear Units (ELUs). arXiv 1230 optional float alpha = 1 [default = 1]; 1231} 1232 1233// Message that stores parameters used by EmbedLayer 1234message EmbedParameter { 1235 optional uint32 num_output = 1; // The number of outputs for the layer 1236 // The input is given as integers to be interpreted as one-hot 1237 // vector indices with dimension num_input. Hence num_input should be 1238 // 1 greater than the maximum possible input value. 1239 optional uint32 input_dim = 2; 1240 1241 optional bool bias_term = 3 [default = true]; // Whether to use a bias term 1242 optional FillerParameter weight_filler = 4; // The filler for the weight 1243 optional FillerParameter bias_filler = 5; // The filler for the bias 1244 1245} 1246 1247// Message that stores parameters used by ExpLayer 1248message ExpParameter { 1249 // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. 1250 // Or if base is set to the default (-1), base is set to e, 1251 // so y = exp(shift + scale * x). 1252 optional float base = 1 [default = -1.0]; 1253 optional float scale = 2 [default = 1.0]; 1254 optional float shift = 3 [default = 0.0]; 1255} 1256 1257/// Message that stores parameters used by FlattenLayer 1258message FlattenParameter { 1259 // The first axis to flatten: all preceding axes are retained in the output. 1260 // May be negative to index from the end (e.g., -1 for the last axis). 1261 optional int32 axis = 1 [default = 1]; 1262 1263 // The last axis to flatten: all following axes are retained in the output. 1264 // May be negative to index from the end (e.g., the default -1 for the last 1265 // axis). 1266 optional int32 end_axis = 2 [default = -1]; 1267} 1268 1269// Message that stores parameters used by HDF5DataLayer 1270message HDF5DataParameter { 1271 // Specify the data source. 1272 optional string source = 1; 1273 // Specify the batch size. 1274 optional uint32 batch_size = 2; 1275 1276 // Specify whether to shuffle the data. 1277 // If shuffle == true, the ordering of the HDF5 files is shuffled, 1278 // and the ordering of data within any given HDF5 file is shuffled, 1279 // but data between different files are not interleaved; all of a file's 1280 // data are output (in a random order) before moving onto another file. 1281 optional bool shuffle = 3 [default = false]; 1282} 1283 1284message HDF5OutputParameter { 1285 optional string file_name = 1; 1286} 1287 1288message HingeLossParameter { 1289 enum Norm { 1290 L1 = 1; 1291 L2 = 2; 1292 } 1293 // Specify the Norm to use L1 or L2 1294 optional Norm norm = 1 [default = L1]; 1295} 1296 1297message ImageDataParameter { 1298 // Specify the data source. 1299 optional string source = 1; 1300 // Specify the batch size. 1301 optional uint32 batch_size = 4 [default = 1]; 1302 // The rand_skip variable is for the data layer to skip a few data points 1303 // to avoid all asynchronous sgd clients to start at the same point. The skip 1304 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1305 // be larger than the number of keys in the database. 1306 optional uint32 rand_skip = 7 [default = 0]; 1307 // Whether or not ImageLayer should shuffle the list of files at every epoch. 1308 optional bool shuffle = 8 [default = false]; 1309 // It will also resize images if new_height or new_width are not zero. 1310 optional uint32 new_height = 9 [default = 0]; 1311 optional uint32 new_width = 10 [default = 0]; 1312 // Specify if the images are color or gray 1313 optional bool is_color = 11 [default = true]; 1314 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 1315 // simple scaling and subtracting the data mean, if provided. Note that the 1316 // mean subtraction is always carried out before scaling. 1317 optional float scale = 2 [default = 1]; 1318 optional string mean_file = 3; 1319 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 1320 // crop an image. 1321 optional uint32 crop_size = 5 [default = 0]; 1322 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 1323 // data. 1324 optional bool mirror = 6 [default = false]; 1325 optional string root_folder = 12 [default = ""]; 1326 optional uint32 label_num = 13 [default = 1]; 1327 optional uint32 att_num = 14 [default = 1]; 1328} 1329 1330message InfogainLossParameter { 1331 // Specify the infogain matrix source. 1332 optional string source = 1; 1333 optional int32 axis = 2 [default = 1]; // axis of prob 1334} 1335 1336message InnerProductParameter { 1337 optional uint32 num_output = 1; // The number of outputs for the layer 1338 optional bool bias_term = 2 [default = true]; // whether to have bias terms 1339 optional FillerParameter weight_filler = 3; // The filler for the weight 1340 optional FillerParameter bias_filler = 4; // The filler for the bias 1341 1342 // The first axis to be lumped into a single inner product computation; 1343 // all preceding axes are retained in the output. 1344 // May be negative to index from the end (e.g., -1 for the last axis). 1345 optional int32 axis = 5 [default = 1]; 1346 // Specify whether to transpose the weight matrix or not. 1347 // If transpose == true, any operations will be performed on the transpose 1348 // of the weight matrix. The weight matrix itself is not going to be transposed 1349 // but rather the transfer flag of operations will be toggled accordingly. 1350 optional bool transpose = 6 [default = false]; 1351 optional uint32 paramq = 7 [default = 0]; 1352 optional float params = 8 [default = 0]; 1353 optional float scale_in = 9; 1354 optional float scale_out = 10; 1355 optional int32 a_min = 11 [default = -128]; 1356 optional int32 a_max = 12 [default = 127]; 1357} 1358 1359message InputParameter { 1360 // This layer produces N >= 1 top blob(s) to be assigned manually. 1361 // Define N shapes to set a shape for each top. 1362 // Define 1 shape to set the same shape for every top. 1363 // Define no shape to defer to reshaping manually. 1364 repeated BlobShape shape = 1; 1365} 1366 1367// Message that stores parameters used by LogLayer 1368message LogParameter { 1369 // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. 1370 // Or if base is set to the default (-1), base is set to e, 1371 // so y = ln(shift + scale * x) = log_e(shift + scale * x) 1372 optional float base = 1 [default = -1.0]; 1373 optional float scale = 2 [default = 1.0]; 1374 optional float shift = 3 [default = 0.0]; 1375} 1376 1377// Message that stores parameters used by LRNLayer 1378message LRNParameter { 1379 optional uint32 local_size = 1 [default = 5]; 1380 optional float alpha = 2 [default = 1.]; 1381 optional float beta = 3 [default = 0.75]; 1382 enum NormRegion { 1383 ACROSS_CHANNELS = 0; 1384 WITHIN_CHANNEL = 1; 1385 } 1386 optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; 1387 optional float k = 5 [default = 1.]; 1388 enum Engine { 1389 DEFAULT = 0; 1390 CAFFE = 1; 1391 CUDNN = 2; 1392 } 1393 optional Engine engine = 6 [default = DEFAULT]; 1394} 1395 1396message MemoryDataParameter { 1397 optional uint32 batch_size = 1; 1398 optional uint32 channels = 2; 1399 optional uint32 height = 3; 1400 optional uint32 width = 4; 1401} 1402 1403// Message that store parameters used by MultiBoxLossLayer 1404message MultiBoxLossParameter { 1405 // Localization loss type. 1406 enum LocLossType { 1407 L2 = 0; 1408 SMOOTH_L1 = 1; 1409 } 1410 optional LocLossType loc_loss_type = 1 [default = SMOOTH_L1]; 1411 // Confidence loss type. 1412 enum ConfLossType { 1413 SOFTMAX = 0; 1414 LOGISTIC = 1; 1415 } 1416 optional ConfLossType conf_loss_type = 2 [default = SOFTMAX]; 1417 // Weight for localization loss. 1418 optional float loc_weight = 3 [default = 1.0]; 1419 // Number of classes to be predicted. Required! 1420 optional uint32 num_classes = 4; 1421 // If true, bounding box are shared among different classes. 1422 optional bool share_location = 5 [default = true]; 1423 // Matching method during training. 1424 enum MatchType { 1425 BIPARTITE = 0; 1426 PER_PREDICTION = 1; 1427 } 1428 optional MatchType match_type = 6 [default = PER_PREDICTION]; 1429 // If match_type is PER_PREDICTION, use overlap_threshold to 1430 // determine the extra matching bboxes. 1431 optional float overlap_threshold = 7 [default = 0.5]; 1432 // Use prior for matching. 1433 optional bool use_prior_for_matching = 8 [default = true]; 1434 // Background label id. 1435 optional uint32 background_label_id = 9 [default = 0]; 1436 // If true, also consider difficult ground truth. 1437 optional bool use_difficult_gt = 10 [default = true]; 1438 // If true, perform negative mining. 1439 // DEPRECATED: use mining_type instead. 1440 optional bool do_neg_mining = 11; 1441 // The negative/positive ratio. 1442 optional float neg_pos_ratio = 12 [default = 3.0]; 1443 // The negative overlap upperbound for the unmatched predictions. 1444 optional float neg_overlap = 13 [default = 0.5]; 1445 // Type of coding method for bbox. 1446 optional PriorBoxParameter.CodeType code_type = 14 [default = CORNER]; 1447 // If true, encode the variance of prior box in the loc loss target instead of 1448 // in bbox. 1449 optional bool encode_variance_in_target = 16 [default = false]; 1450 // If true, map all object classes to agnostic class. It is useful for learning 1451 // objectness detector. 1452 optional bool map_object_to_agnostic = 17 [default = false]; 1453 // If true, ignore cross boundary bbox during matching. 1454 // Cross boundary bbox is a bbox who is outside of the image region. 1455 optional bool ignore_cross_boundary_bbox = 18 [default = false]; 1456 // If true, only backpropagate on corners which are inside of the image 1457 // region when encode_type is CORNER or CORNER_SIZE. 1458 optional bool bp_inside = 19 [default = false]; 1459 // Mining type during training. 1460 // NONE : use all negatives. 1461 // MAX_NEGATIVE : select negatives based on the score. 1462 // HARD_EXAMPLE : select hard examples based on "Training Region-based Object Detectors with Online Hard Example Mining", Shrivastava et.al. 1463 enum MiningType { 1464 NONE = 0; 1465 MAX_NEGATIVE = 1; 1466 HARD_EXAMPLE = 2; 1467 } 1468 optional MiningType mining_type = 20 [default = MAX_NEGATIVE]; 1469 // Parameters used for non maximum suppression durig hard example mining. 1470 optional NonMaximumSuppressionParameter nms_param = 21; 1471 optional int32 sample_size = 22 [default = 64]; 1472 optional bool use_prior_for_nms = 23 [default = false]; 1473} 1474 1475message MVNParameter { 1476 // This parameter can be set to false to normalize mean only 1477 optional bool normalize_variance = 1 [default = true]; 1478 1479 // This parameter can be set to true to perform DNN-like MVN 1480 optional bool across_channels = 2 [default = false]; 1481 1482 // Epsilon for not dividing by zero while normalizing variance 1483 optional float eps = 3 [default = 1e-9]; 1484} 1485 1486// Message that stores parameters used by NormalizeLayer 1487message NormalizeParameter { 1488 optional bool across_spatial = 1 [default = true]; 1489 // Initial value of scale. Default is 1.0 for all 1490 optional FillerParameter scale_filler = 2; 1491 // Whether or not scale parameters are shared across channels. 1492 optional bool channel_shared = 3 [default = true]; 1493 // Epsilon for not dividing by zero while normalizing variance 1494 optional float eps = 4 [default = 1e-10]; 1495} 1496 1497message ParameterParameter { 1498 optional BlobShape shape = 1; 1499} 1500 1501message PermuteParameter { 1502 // The new orders of the axes of data. Notice it should be with 1503 // in the same range as the input data, and it starts from 0. 1504 // Do not provide repeated order. 1505 repeated uint32 order = 1; 1506} 1507 1508message PoolingParameter { 1509 enum PoolMethod { 1510 MAX = 0; 1511 AVE = 1; 1512 STOCHASTIC = 2; 1513 } 1514 optional bool avg_include_pad = 13 [default = true]; 1515 optional PoolMethod pool = 1 [default = MAX]; // The pooling method 1516 // Pad, kernel size, and stride are all given as a single value for equal 1517 // dimensions in height and width or as Y, X pairs. 1518 optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) 1519 optional uint32 pad_h = 9 [default = 0]; // The padding height 1520 optional uint32 pad_w = 10 [default = 0]; // The padding width 1521 optional uint32 kernel_size = 2; // The kernel size (square) 1522 optional uint32 kernel_h = 5; // The kernel height 1523 optional uint32 kernel_w = 6; // The kernel width 1524 optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) 1525 optional uint32 stride_h = 7; // The stride height 1526 optional uint32 stride_w = 8; // The stride width 1527 enum Engine { 1528 DEFAULT = 0; 1529 CAFFE = 1; 1530 CUDNN = 2; 1531 } 1532 optional Engine engine = 11 [default = DEFAULT]; 1533 // If global_pooling then it will pool over the size of the bottom by doing 1534 // kernel_h = bottom->height and kernel_w = bottom->width 1535 optional bool global_pooling = 12 [default = false]; 1536 enum RoundMode { 1537 CEIL = 0; 1538 FLOOR = 1; 1539 } 1540 optional RoundMode round_mode = 14 [default = CEIL]; 1541 1542} 1543 1544message Pooling3DParameter { 1545 enum PoolMethod { 1546 MAX = 0; 1547 AVE = 1; 1548 STOCHASTIC = 2; 1549 } 1550 optional PoolMethod pool = 1 [default = MAX]; // The pooling method 1551 optional uint32 kernel_size = 2; // The kernel size 1552 optional uint32 kernel_depth = 3; // The kernel depth 1553 optional uint32 stride = 4 [default = 1]; // The stride 1554 optional uint32 temporal_stride = 5 [default = 1]; // The temporal stride 1555 optional uint32 pad = 6 [default = 0]; 1556 optional uint32 temporal_pad = 7 [default = 0]; 1557} 1558 1559message PowerParameter { 1560 // PowerLayer computes outputs y = (shift + scale * x) ^ power. 1561 optional float power = 1 [default = 1.0]; 1562 optional float scale = 2 [default = 1.0]; 1563 optional float shift = 3 [default = 0.0]; 1564} 1565 1566// Message that store parameters used by PriorBoxLayer 1567message PriorBoxParameter { 1568 // Encode/decode type. 1569 enum CodeType { 1570 CORNER = 1; 1571 CENTER_SIZE = 2; 1572 CORNER_SIZE = 3; 1573 } 1574 // Minimum box size (in pixels). Required! 1575 repeated float min_size = 1; 1576 // Maximum box size (in pixels). Required! 1577 repeated float max_size = 2; 1578 // Various of aspect ratios. Duplicate ratios will be ignored. 1579 // If none is provided, we use default ratio 1. 1580 repeated float aspect_ratio = 3; 1581 // If true, will flip each aspect ratio. 1582 // For example, if there is aspect ratio "r", 1583 // we will generate aspect ratio "1.0/r" as well. 1584 optional bool flip = 4 [default = true]; 1585 // If true, will clip the prior so that it is within [0, 1] 1586 optional bool clip = 5 [default = false]; 1587 // Variance for adjusting the prior bboxes. 1588 repeated float variance = 6; 1589 // By default, we calculate img_height, img_width, step_x, step_y based on 1590 // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitely 1591 // provided. 1592 // Explicitly provide the img_size. 1593 optional uint32 img_size = 7; 1594 // Either img_size or img_h/img_w should be specified; not both. 1595 optional uint32 img_h = 8; 1596 optional uint32 img_w = 9; 1597 1598 // Explicitly provide the step size. 1599 optional float step = 10; 1600 // Either step or step_h/step_w should be specified; not both. 1601 optional float step_h = 11; 1602 optional float step_w = 12; 1603 1604 // Offset to the top left corner of each cell. 1605 optional float offset = 13 [default = 0.5]; 1606} 1607 1608message PythonParameter { 1609 optional string module = 1; 1610 optional string layer = 2; 1611 // This value is set to the attribute `param_str` of the `PythonLayer` object 1612 // in Python before calling the `setup()` method. This could be a number, 1613 // string, dictionary in Python dict format, JSON, etc. You may parse this 1614 // string in `setup` method and use it in `forward` and `backward`. 1615 optional string param_str = 3 [default = '']; 1616 // Whether this PythonLayer is shared among worker solvers during data parallelism. 1617 // If true, each worker solver sequentially run forward from this layer. 1618 // This value should be set true if you are using it as a data layer. 1619 optional bool share_in_parallel = 4 [default = false]; 1620} 1621 1622// Message that stores parameters used by RecurrentLayer 1623message RecurrentParameter { 1624 // The dimension of the output (and usually hidden state) representation -- 1625 // must be explicitly set to non-zero. 1626 optional uint32 num_output = 1 [default = 0]; 1627 1628 optional FillerParameter weight_filler = 2; // The filler for the weight 1629 optional FillerParameter bias_filler = 3; // The filler for the bias 1630 1631 // Whether to enable displaying debug_info in the unrolled recurrent net. 1632 optional bool debug_info = 4 [default = false]; 1633 1634 // Whether to add as additional inputs (bottoms) the initial hidden state 1635 // blobs, and add as additional outputs (tops) the final timestep hidden state 1636 // blobs. The number of additional bottom/top blobs required depends on the 1637 // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. 1638 optional bool expose_hidden = 5 [default = false]; 1639} 1640 1641// Message that stores parameters used by ReductionLayer 1642message ReductionParameter { 1643 enum ReductionOp { 1644 SUM = 1; 1645 ASUM = 2; 1646 SUMSQ = 3; 1647 MEAN = 4; 1648 } 1649 1650 optional ReductionOp operation = 1 [default = SUM]; // reduction operation 1651 1652 // The first axis to reduce to a scalar -- may be negative to index from the 1653 // end (e.g., -1 for the last axis). 1654 // (Currently, only reduction along ALL "tail" axes is supported; reduction 1655 // of axis M through N, where N < num_axes - 1, is unsupported.) 1656 // Suppose we have an n-axis bottom Blob with shape: 1657 // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). 1658 // If axis == m, the output Blob will have shape 1659 // (d0, d1, d2, ..., d(m-1)), 1660 // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) 1661 // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. 1662 // If axis == 0 (the default), the output Blob always has the empty shape 1663 // (count 1), performing reduction across the entire input -- 1664 // often useful for creating new loss functions. 1665 optional int32 axis = 2 [default = 0]; 1666 1667 optional float coeff = 3 [default = 1.0]; // coefficient for output 1668} 1669 1670// Message that stores parameters used by ReLULayer 1671message ReLUParameter { 1672 // Allow non-zero slope for negative inputs to speed up optimization 1673 // Described in: 1674 // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities 1675 // improve neural network acoustic models. In ICML Workshop on Deep Learning 1676 // for Audio, Speech, and Language Processing. 1677 optional float negative_slope = 1 [default = 0]; 1678 enum Engine { 1679 DEFAULT = 0; 1680 CAFFE = 1; 1681 CUDNN = 2; 1682 } 1683 optional Engine engine = 2 [default = DEFAULT]; 1684} 1685 1686message ReshapeParameter { 1687 // Specify the output dimensions. If some of the dimensions are set to 0, 1688 // the corresponding dimension from the bottom layer is used (unchanged). 1689 // Exactly one dimension may be set to -1, in which case its value is 1690 // inferred from the count of the bottom blob and the remaining dimensions. 1691 // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: 1692 // 1693 // layer { 1694 // type: "Reshape" bottom: "input" top: "output" 1695 // reshape_param { ... } 1696 // } 1697 // 1698 // If "input" is 2D with shape 2 x 8, then the following reshape_param 1699 // specifications are all equivalent, producing a 3D blob "output" with shape 1700 // 2 x 2 x 4: 1701 // 1702 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1703 // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } 1704 // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } 1705 // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } 1706 // 1707 optional BlobShape shape = 1; 1708 1709 // axis and num_axes control the portion of the bottom blob's shape that are 1710 // replaced by (included in) the reshape. By default (axis == 0 and 1711 // num_axes == -1), the entire bottom blob shape is included in the reshape, 1712 // and hence the shape field must specify the entire output shape. 1713 // 1714 // axis may be non-zero to retain some portion of the beginning of the input 1715 // shape (and may be negative to index from the end; e.g., -1 to begin the 1716 // reshape after the last axis, including nothing in the reshape, 1717 // -2 to include only the last axis, etc.). 1718 // 1719 // For example, suppose "input" is a 2D blob with shape 2 x 8. 1720 // Then the following ReshapeLayer specifications are all equivalent, 1721 // producing a blob "output" with shape 2 x 2 x 4: 1722 // 1723 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1724 // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } 1725 // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } 1726 // 1727 // num_axes specifies the extent of the reshape. 1728 // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on 1729 // input axes in the range [axis, axis+num_axes]. 1730 // num_axes may also be -1, the default, to include all remaining axes 1731 // (starting from axis). 1732 // 1733 // For example, suppose "input" is a 2D blob with shape 2 x 8. 1734 // Then the following ReshapeLayer specifications are equivalent, 1735 // producing a blob "output" with shape 1 x 2 x 8. 1736 // 1737 // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } 1738 // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } 1739 // reshape_param { shape { dim: 1 } num_axes: 0 } 1740 // 1741 // On the other hand, these would produce output blob shape 2 x 1 x 8: 1742 // 1743 // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } 1744 // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } 1745 // 1746 optional int32 axis = 2 [default = 0]; 1747 optional int32 num_axes = 3 [default = -1]; 1748} 1749 1750message ScaleParameter { 1751 // The first axis of bottom[0] (the first input Blob) along which to apply 1752 // bottom[1] (the second input Blob). May be negative to index from the end 1753 // (e.g., -1 for the last axis). 1754 // 1755 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 1756 // top[0] will have the same shape, and bottom[1] may have any of the 1757 // following shapes (for the given value of axis): 1758 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 1759 // (axis == 1 == -3) 3; 3x40; 3x40x60 1760 // (axis == 2 == -2) 40; 40x60 1761 // (axis == 3 == -1) 60 1762 // Furthermore, bottom[1] may have the empty shape (regardless of the value of 1763 // "axis") -- a scalar multiplier. 1764 optional int32 axis = 1 [default = 1]; 1765 1766 // (num_axes is ignored unless just one bottom is given and the scale is 1767 // a learned parameter of the layer. Otherwise, num_axes is determined by the 1768 // number of axes by the second bottom.) 1769 // The number of axes of the input (bottom[0]) covered by the scale 1770 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 1771 // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. 1772 optional int32 num_axes = 2 [default = 1]; 1773 1774 // (filler is ignored unless just one bottom is given and the scale is 1775 // a learned parameter of the layer.) 1776 // The initialization for the learned scale parameter. 1777 // Default is the unit (1) initialization, resulting in the ScaleLayer 1778 // initially performing the identity operation. 1779 optional FillerParameter filler = 3; 1780 1781 // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but 1782 // may be more efficient). Initialized with bias_filler (defaults to 0). 1783 optional bool bias_term = 4 [default = false]; 1784 optional FillerParameter bias_filler = 5; 1785} 1786 1787message SigmoidParameter { 1788 enum Engine { 1789 DEFAULT = 0; 1790 CAFFE = 1; 1791 CUDNN = 2; 1792 } 1793 optional Engine engine = 1 [default = DEFAULT]; 1794} 1795 1796message SliceParameter { 1797 // The axis along which to slice -- may be negative to index from the end 1798 // (e.g., -1 for the last axis). 1799 // By default, SliceLayer concatenates blobs along the "channels" axis (1). 1800 optional int32 axis = 3 [default = 1]; 1801 repeated uint32 slice_point = 2; 1802 1803 // DEPRECATED: alias for "axis" -- does not support negative indexing. 1804 optional uint32 slice_dim = 1 [default = 1]; 1805} 1806 1807// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 1808message SoftmaxParameter { 1809 enum Engine { 1810 DEFAULT = 0; 1811 CAFFE = 1; 1812 CUDNN = 2; 1813 } 1814 optional Engine engine = 1 [default = DEFAULT]; 1815 1816 // The axis along which to perform the softmax -- may be negative to index 1817 // from the end (e.g., -1 for the last axis). 1818 // Any other axes will be evaluated as independent softmaxes. 1819 optional int32 axis = 2 [default = 1]; 1820} 1821 1822message TanHParameter { 1823 enum Engine { 1824 DEFAULT = 0; 1825 CAFFE = 1; 1826 CUDNN = 2; 1827 } 1828 optional Engine engine = 1 [default = DEFAULT]; 1829} 1830 1831// Message that stores parameters used by TileLayer 1832message TileParameter { 1833 // The index of the axis to tile. 1834 optional int32 axis = 1 [default = 1]; 1835 1836 // The number of copies (tiles) of the blob to output. 1837 optional int32 tiles = 2; 1838} 1839 1840// Message that stores parameters used by ThresholdLayer 1841message ThresholdParameter { 1842 optional float threshold = 1 [default = 0]; // Strictly positive values 1843} 1844 1845message WindowDataParameter { 1846 // Specify the data source. 1847 optional string source = 1; 1848 // For data pre-processing, we can do simple scaling and subtracting the 1849 // data mean, if provided. Note that the mean subtraction is always carried 1850 // out before scaling. 1851 optional float scale = 2 [default = 1]; 1852 optional string mean_file = 3; 1853 // Specify the batch size. 1854 optional uint32 batch_size = 4; 1855 // Specify if we would like to randomly crop an image. 1856 optional uint32 crop_size = 5 [default = 0]; 1857 // Specify if we want to randomly mirror data. 1858 optional bool mirror = 6 [default = false]; 1859 // Foreground (object) overlap threshold 1860 optional float fg_threshold = 7 [default = 0.5]; 1861 // Background (non-object) overlap threshold 1862 optional float bg_threshold = 8 [default = 0.5]; 1863 // Fraction of batch that should be foreground objects 1864 optional float fg_fraction = 9 [default = 0.25]; 1865 // Amount of contextual padding to add around a window 1866 // (used only by the window_data_layer) 1867 optional uint32 context_pad = 10 [default = 0]; 1868 // Mode for cropping out a detection window 1869 // warp: cropped window is warped to a fixed size and aspect ratio 1870 // square: the tightest square around the window is cropped 1871 optional string crop_mode = 11 [default = "warp"]; 1872 // cache_images: will load all images in memory for faster access 1873 optional bool cache_images = 12 [default = false]; 1874 // append root_folder to locate images 1875 optional string root_folder = 13 [default = ""]; 1876} 1877 1878message SPPParameter { 1879 enum PoolMethod { 1880 MAX = 0; 1881 AVE = 1; 1882 STOCHASTIC = 2; 1883 } 1884 optional uint32 pyramid_height = 1; 1885 optional PoolMethod pool = 2 [default = MAX]; // The pooling method 1886 enum Engine { 1887 DEFAULT = 0; 1888 CAFFE = 1; 1889 CUDNN = 2; 1890 } 1891 optional Engine engine = 6 [default = DEFAULT]; 1892} 1893 1894// DEPRECATED: use LayerParameter. 1895message V1LayerParameter { 1896 repeated string bottom = 2; 1897 repeated string top = 3; 1898 optional string name = 4; 1899 repeated NetStateRule include = 32; 1900 repeated NetStateRule exclude = 33; 1901 enum LayerType { 1902 NONE = 0; 1903 ABSVAL = 35; 1904 ACCURACY = 1; 1905 ARGMAX = 30; 1906 BNLL = 2; 1907 CONCAT = 3; 1908 CONTRASTIVE_LOSS = 37; 1909 CONVOLUTION = 4; 1910 DATA = 5; 1911 DECONVOLUTION = 39; 1912 DROPOUT = 6; 1913 DUMMY_DATA = 32; 1914 EUCLIDEAN_LOSS = 7; 1915 ELTWISE = 25; 1916 EXP = 38; 1917 FLATTEN = 8; 1918 HDF5_DATA = 9; 1919 HDF5_OUTPUT = 10; 1920 HINGE_LOSS = 28; 1921 IM2COL = 11; 1922 IMAGE_DATA = 12; 1923 INFOGAIN_LOSS = 13; 1924 INNER_PRODUCT = 14; 1925 LRN = 15; 1926 MEMORY_DATA = 29; 1927 MULTINOMIAL_LOGISTIC_LOSS = 16; 1928 MVN = 34; 1929 POOLING = 17; 1930 POWER = 26; 1931 RELU = 18; 1932 SIGMOID = 19; 1933 SIGMOID_CROSS_ENTROPY_LOSS = 27; 1934 SILENCE = 36; 1935 SOFTMAX = 20; 1936 SOFTMAX_LOSS = 21; 1937 SPLIT = 22; 1938 SLICE = 33; 1939 TANH = 23; 1940 WINDOW_DATA = 24; 1941 THRESHOLD = 31; 1942 } 1943 optional LayerType type = 5; 1944 repeated BlobProto blobs = 6; 1945 repeated string param = 1001; 1946 repeated DimCheckMode blob_share_mode = 1002; 1947 enum DimCheckMode { 1948 STRICT = 0; 1949 PERMISSIVE = 1; 1950 } 1951 repeated float blobs_lr = 7; 1952 repeated float weight_decay = 8; 1953 repeated float loss_weight = 35; 1954 optional AccuracyParameter accuracy_param = 27; 1955 optional ArgMaxParameter argmax_param = 23; 1956 optional ConcatParameter concat_param = 9; 1957 optional ContrastiveLossParameter contrastive_loss_param = 40; 1958 optional ConvolutionParameter convolution_param = 10; 1959 optional DataParameter data_param = 11; 1960 optional DropoutParameter dropout_param = 12; 1961 optional DummyDataParameter dummy_data_param = 26; 1962 optional EltwiseParameter eltwise_param = 24; 1963 optional ExpParameter exp_param = 41; 1964 optional HDF5DataParameter hdf5_data_param = 13; 1965 optional HDF5OutputParameter hdf5_output_param = 14; 1966 optional HingeLossParameter hinge_loss_param = 29; 1967 optional ImageDataParameter image_data_param = 15; 1968 optional InfogainLossParameter infogain_loss_param = 16; 1969 optional InnerProductParameter inner_product_param = 17; 1970 optional LRNParameter lrn_param = 18; 1971 optional MemoryDataParameter memory_data_param = 22; 1972 optional MVNParameter mvn_param = 34; 1973 optional PoolingParameter pooling_param = 19; 1974 optional PowerParameter power_param = 21; 1975 optional ReLUParameter relu_param = 30; 1976 optional SigmoidParameter sigmoid_param = 38; 1977 optional SoftmaxParameter softmax_param = 39; 1978 optional SliceParameter slice_param = 31; 1979 optional TanHParameter tanh_param = 37; 1980 optional ThresholdParameter threshold_param = 25; 1981 optional WindowDataParameter window_data_param = 20; 1982 optional TransformationParameter transform_param = 36; 1983 optional LossParameter loss_param = 42; 1984 optional V0LayerParameter layer = 1; 1985} 1986 1987// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters 1988// in Caffe. We keep this message type around for legacy support. 1989message V0LayerParameter { 1990 optional string name = 1; // the layer name 1991 optional string type = 2; // the string to specify the layer type 1992 1993 // Parameters to specify layers with inner products. 1994 optional uint32 num_output = 3; // The number of outputs for the layer 1995 optional bool biasterm = 4 [default = true]; // whether to have bias terms 1996 optional FillerParameter weight_filler = 5; // The filler for the weight 1997 optional FillerParameter bias_filler = 6; // The filler for the bias 1998 1999 optional uint32 pad = 7 [default = 0]; // The padding size 2000 optional uint32 kernelsize = 8; // The kernel size 2001 optional uint32 group = 9 [default = 1]; // The group size for group conv 2002 optional uint32 stride = 10 [default = 1]; // The stride 2003 enum PoolMethod { 2004 MAX = 0; 2005 AVE = 1; 2006 STOCHASTIC = 2; 2007 } 2008 optional PoolMethod pool = 11 [default = MAX]; // The pooling method 2009 optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 2010 2011 optional uint32 local_size = 13 [default = 5]; // for local response norm 2012 optional float alpha = 14 [default = 1.]; // for local response norm 2013 optional float beta = 15 [default = 0.75]; // for local response norm 2014 optional float k = 22 [default = 1.]; 2015 2016 // For data layers, specify the data source 2017 optional string source = 16; 2018 // For data pre-processing, we can do simple scaling and subtracting the 2019 // data mean, if provided. Note that the mean subtraction is always carried 2020 // out before scaling. 2021 optional float scale = 17 [default = 1]; 2022 optional string meanfile = 18; 2023 // For data layers, specify the batch size. 2024 optional uint32 batchsize = 19; 2025 // For data layers, specify if we would like to randomly crop an image. 2026 optional uint32 cropsize = 20 [default = 0]; 2027 // For data layers, specify if we want to randomly mirror data. 2028 optional bool mirror = 21 [default = false]; 2029 2030 // The blobs containing the numeric parameters of the layer 2031 repeated BlobProto blobs = 50; 2032 // The ratio that is multiplied on the global learning rate. If you want to 2033 // set the learning ratio for one blob, you need to set it for all blobs. 2034 repeated float blobs_lr = 51; 2035 // The weight decay that is multiplied on the global weight decay. 2036 repeated float weight_decay = 52; 2037 2038 // The rand_skip variable is for the data layer to skip a few data points 2039 // to avoid all asynchronous sgd clients to start at the same point. The skip 2040 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 2041 // be larger than the number of keys in the database. 2042 optional uint32 rand_skip = 53 [default = 0]; 2043 2044 // Fields related to detection (det_*) 2045 // foreground (object) overlap threshold 2046 optional float det_fg_threshold = 54 [default = 0.5]; 2047 // background (non-object) overlap threshold 2048 optional float det_bg_threshold = 55 [default = 0.5]; 2049 // Fraction of batch that should be foreground objects 2050 optional float det_fg_fraction = 56 [default = 0.25]; 2051 2052 // optional bool OBSOLETE_can_clobber = 57 [default = true]; 2053 2054 // Amount of contextual padding to add around a window 2055 // (used only by the window_data_layer) 2056 optional uint32 det_context_pad = 58 [default = 0]; 2057 2058 // Mode for cropping out a detection window 2059 // warp: cropped window is warped to a fixed size and aspect ratio 2060 // square: the tightest square around the window is cropped 2061 optional string det_crop_mode = 59 [default = "warp"]; 2062 2063 // For ReshapeLayer, one needs to specify the new dimensions. 2064 optional int32 new_num = 60 [default = 0]; 2065 optional int32 new_channels = 61 [default = 0]; 2066 optional int32 new_height = 62 [default = 0]; 2067 optional int32 new_width = 63 [default = 0]; 2068 2069 // Whether or not ImageLayer should shuffle the list of files at every epoch. 2070 // It will also resize images if new_height or new_width are not zero. 2071 optional bool shuffle_images = 64 [default = false]; 2072 2073 // For ConcatLayer, one needs to specify the dimension for concatenation, and 2074 // the other dimensions must be the same for all the bottom blobs. 2075 // By default it will concatenate blobs along the channels dimension. 2076 optional uint32 concat_dim = 65 [default = 1]; 2077 2078 optional HDF5OutputParameter hdf5_output_param = 1001; 2079} 2080 2081message PReLUParameter { 2082 // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: 2083 // Surpassing Human-Level Performance on ImageNet Classification, 2015. 2084 2085 // Initial value of a_i. Default is a_i=0.25 for all i. 2086 optional FillerParameter filler = 1; 2087 // Whether or not slope parameters are shared across channels. 2088 optional bool channel_shared = 2 [default = false]; 2089} 2090 2091message ShuffleChannelParameter { 2092 // first introduced by 2093 // "ShuffleNet: An Extremely Efficient Convolutional Neural Network 2094 // for Mobile Devices" 2095 optional uint32 group = 1[default = 1]; // The number of group 2096} 2097 2098message UpsampleParameter { 2099 optional float scale = 1; 2100} 2101