1/*M/////////////////////////////////////////////////////////////////////////////////////// 2//COPYRIGHT 3// 4//All contributions by the University of California: 5//Copyright (c) 2014, The Regents of the University of California (Regents) 6//All rights reserved. 7// 8//All other contributions: 9//Copyright (c) 2014, the respective contributors 10//All rights reserved. 11// 12//Caffe uses a shared copyright model: each contributor holds copyright over 13//their contributions to Caffe. The project versioning records all such 14//contribution and copyright details. If a contributor wants to further mark 15//their specific copyright on a particular contribution, they should indicate 16//their copyright solely in the commit message of the change when it is 17//committed. 18// 19//LICENSE 20// 21//Redistribution and use in source and binary forms, with or without 22//modification, are permitted provided that the following conditions are met: 23// 24//1. Redistributions of source code must retain the above copyright notice, this 25// list of conditions and the following disclaimer. 26//2. Redistributions in binary form must reproduce the above copyright notice, 27// this list of conditions and the following disclaimer in the documentation 28// and/or other materials provided with the distribution. 29// 30//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 31//ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 32//WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 33//DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 34//ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 35//(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 36//LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 37//ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38//(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 39//SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40// 41//CONTRIBUTION AGREEMENT 42// 43//By contributing to the BVLC/caffe repository through pull-request, comment, 44//or otherwise, the contributor releases their content to the 45//license and copyright terms herein. 46// 47//M*/ 48 49syntax = "proto2"; 50 51package opencv_caffe; 52 53// NVIDIA's Caffe feature is used to store fp16 weights, https://github.com/NVIDIA/caffe: 54// Math and storage types 55enum Type { 56 DOUBLE = 0; 57 FLOAT = 1; 58 FLOAT16 = 2; 59 INT = 3; // math not supported 60 UINT = 4; // math not supported 61} 62 63// Specifies the shape (dimensions) of a Blob. 64message BlobShape { 65 repeated int64 dim = 1 [packed = true]; 66} 67 68message BlobProto { 69 optional BlobShape shape = 7; 70 repeated float data = 5 [packed = true]; 71 repeated float diff = 6 [packed = true]; 72 repeated double double_data = 8 [packed = true]; 73 repeated double double_diff = 9 [packed = true]; 74 75 // NVIDIA's Caffe fields begin. 76 optional Type raw_data_type = 10; 77 optional bytes raw_data = 12 [packed = false]; 78 // NVIDIA's Caffe fields end. 79 80 // 4D dimensions -- deprecated. Use "shape" instead. 81 optional int32 num = 1 [default = 0]; 82 optional int32 channels = 2 [default = 0]; 83 optional int32 height = 3 [default = 0]; 84 optional int32 width = 4 [default = 0]; 85} 86 87// The BlobProtoVector is simply a way to pass multiple blobproto instances 88// around. 89message BlobProtoVector { 90 repeated BlobProto blobs = 1; 91} 92 93message PermuteParameter { 94 // The new orders of the axes of data. Notice it should be with 95 // in the same range as the input data, and it starts from 0. 96 // Do not provide repeated order. 97 repeated uint32 order = 1; 98} 99 100// Message that stores parameters used by NormalizeBBoxLayer 101message NormalizeBBoxParameter { 102 optional bool across_spatial = 1 [default = true]; 103 // Initial value of scale. Default is 1.0 for all 104 optional FillerParameter scale_filler = 2; 105 // Whether or not scale parameters are shared across channels. 106 optional bool channel_shared = 3 [default = true]; 107 // Epsilon for not dividing by zero while normalizing variance 108 optional float eps = 4 [default = 1e-10]; 109} 110 111// Message that store parameters used by PriorBoxLayer 112message PriorBoxParameter { 113 // Encode/decode type. 114 enum CodeType { 115 CORNER = 1; 116 CENTER_SIZE = 2; 117 } 118 // Minimum box size (in pixels). Required! 119 repeated float min_size = 1; 120 // Maximum box size (in pixels). Required! 121 repeated float max_size = 2; 122 // Various of aspect ratios. Duplicate ratios will be ignored. 123 // If none is provided, we use default ratio 1. 124 repeated float aspect_ratio = 3; 125 // If true, will flip each aspect ratio. 126 // For example, if there is aspect ratio "r", 127 // we will generate aspect ratio "1.0/r" as well. 128 optional bool flip = 4 [default = true]; 129 // If true, will clip the prior so that it is within [0, 1] 130 optional bool clip = 5 [default = true]; 131 // Variance for adjusting the prior bboxes. 132 repeated float variance = 6; 133 // By default, we calculate img_height, img_width, step_x, step_y based on 134 // bottom[0] (feat) and bottom[1] (img). Unless these values are explicitly 135 // provided. 136 // Explicitly provide the img_size. 137 optional uint32 img_size = 7; 138 // Either img_size or img_h/img_w should be specified; not both. 139 optional uint32 img_h = 8; 140 optional uint32 img_w = 9; 141 // Explicitly provide the step size. 142 optional float step = 10; 143 // Either step or step_h/step_w should be specified; not both. 144 optional float step_h = 11; 145 optional float step_w = 12; 146 // Offset to the top left corner of each cell. 147 optional float offset = 13 [default = 0.5]; 148 // Offset to the top corner of each cell. 149 repeated float offset_h = 14; 150 // Offset to the left corner of each cell. 151 repeated float offset_w = 15; 152 // Priox boxes width (in pixels). 153 repeated float width = 16; 154 // Priox boxes height (in pixels). 155 repeated float height = 17; 156} 157 158// Message that store parameters used by DetectionOutputLayer 159message DetectionOutputParameter { 160 // Number of classes to be predicted. Required! 161 optional uint32 num_classes = 1; 162 // If true, bounding box are shared among different classes. 163 optional bool share_location = 2 [default = true]; 164 // Background label id. If there is no background class, 165 // set it as -1. 166 optional int32 background_label_id = 3 [default = 0]; 167 // Parameters used for non maximum suppression. 168 optional NonMaximumSuppressionParameter nms_param = 4; 169 // Parameters used for saving detection results. 170 optional SaveOutputParameter save_output_param = 5; 171 // Type of coding method for bbox. 172 optional PriorBoxParameter.CodeType code_type = 6 [default = CORNER]; 173 // If true, variance is encoded in target; otherwise we need to adjust the 174 // predicted offset accordingly. 175 optional bool variance_encoded_in_target = 8 [default = false]; 176 // Number of total bboxes to be kept per image after nms step. 177 // -1 means keeping all bboxes after nms step. 178 optional int32 keep_top_k = 7 [default = -1]; 179 // Only consider detections whose confidences are larger than a threshold. 180 // If not provided, consider all boxes. 181 optional float confidence_threshold = 9; 182 // If prior boxes are normalized to [0, 1] or not. 183 optional bool normalized_bbox = 10 [default = true]; 184 // OpenCV custom parameter 185 optional bool clip = 1000 [default = false]; 186} 187 188message Datum { 189 optional int32 channels = 1; 190 optional int32 height = 2; 191 optional int32 width = 3; 192 // the actual image data, in bytes 193 optional bytes data = 4; 194 optional int32 label = 5; 195 // Optionally, the datum could also hold float data. 196 repeated float float_data = 6; 197 // If true data contains an encoded image that need to be decoded 198 optional bool encoded = 7 [default = false]; 199} 200 201message FillerParameter { 202 // The filler type. 203 optional string type = 1 [default = 'constant']; 204 optional float value = 2 [default = 0]; // the value in constant filler 205 optional float min = 3 [default = 0]; // the min value in uniform filler 206 optional float max = 4 [default = 1]; // the max value in uniform filler 207 optional float mean = 5 [default = 0]; // the mean value in Gaussian filler 208 optional float std = 6 [default = 1]; // the std value in Gaussian filler 209 // The expected number of non-zero output weights for a given input in 210 // Gaussian filler -- the default -1 means don't perform sparsification. 211 optional int32 sparse = 7 [default = -1]; 212 // Normalize the filler variance by fan_in, fan_out, or their average. 213 // Applies to 'xavier' and 'msra' fillers. 214 enum VarianceNorm { 215 FAN_IN = 0; 216 FAN_OUT = 1; 217 AVERAGE = 2; 218 } 219 optional VarianceNorm variance_norm = 8 [default = FAN_IN]; 220} 221 222message NetParameter { 223 optional string name = 1; // consider giving the network a name 224 // DEPRECATED. See InputParameter. The input blobs to the network. 225 repeated string input = 3; 226 // DEPRECATED. See InputParameter. The shape of the input blobs. 227 repeated BlobShape input_shape = 8; 228 229 // 4D input dimensions -- deprecated. Use "input_shape" instead. 230 // If specified, for each input blob there should be four 231 // values specifying the num, channels, height and width of the input blob. 232 // Thus, there should be a total of (4 * #input) numbers. 233 repeated int32 input_dim = 4; 234 235 // Whether the network will force every layer to carry out backward operation. 236 // If set False, then whether to carry out backward is determined 237 // automatically according to the net structure and learning rates. 238 optional bool force_backward = 5 [default = false]; 239 // The current "state" of the network, including the phase, level, and stage. 240 // Some layers may be included/excluded depending on this state and the states 241 // specified in the layers' include and exclude fields. 242 optional NetState state = 6; 243 244 // Print debugging information about results while running Net::Forward, 245 // Net::Backward, and Net::Update. 246 optional bool debug_info = 7 [default = false]; 247 248 // The layers that make up the net. Each of their configurations, including 249 // connectivity and behavior, is specified as a LayerParameter. 250 repeated LayerParameter layer = 100; // ID 100 so layers are printed last. 251 252 // DEPRECATED: use 'layer' instead. 253 repeated V1LayerParameter layers = 2; 254} 255 256// NOTE 257// Update the next available ID when you add a new SolverParameter field. 258// 259// SolverParameter next available ID: 41 (last added: type) 260message SolverParameter { 261 ////////////////////////////////////////////////////////////////////////////// 262 // Specifying the train and test networks 263 // 264 // Exactly one train net must be specified using one of the following fields: 265 // train_net_param, train_net, net_param, net 266 // One or more test nets may be specified using any of the following fields: 267 // test_net_param, test_net, net_param, net 268 // If more than one test net field is specified (e.g., both net and 269 // test_net are specified), they will be evaluated in the field order given 270 // above: (1) test_net_param, (2) test_net, (3) net_param/net. 271 // A test_iter must be specified for each test_net. 272 // A test_level and/or a test_stage may also be specified for each test_net. 273 ////////////////////////////////////////////////////////////////////////////// 274 275 // Proto filename for the train net, possibly combined with one or more 276 // test nets. 277 optional string net = 24; 278 // Inline train net param, possibly combined with one or more test nets. 279 optional NetParameter net_param = 25; 280 281 optional string train_net = 1; // Proto filename for the train net. 282 repeated string test_net = 2; // Proto filenames for the test nets. 283 optional NetParameter train_net_param = 21; // Inline train net params. 284 repeated NetParameter test_net_param = 22; // Inline test net params. 285 286 // The states for the train/test nets. Must be unspecified or 287 // specified once per net. 288 // 289 // By default, all states will have solver = true; 290 // train_state will have phase = TRAIN, 291 // and all test_state's will have phase = TEST. 292 // Other defaults are set according to the NetState defaults. 293 optional NetState train_state = 26; 294 repeated NetState test_state = 27; 295 296 // The number of iterations for each test net. 297 repeated int32 test_iter = 3; 298 299 // The number of iterations between two testing phases. 300 optional int32 test_interval = 4 [default = 0]; 301 optional bool test_compute_loss = 19 [default = false]; 302 // If true, run an initial test pass before the first iteration, 303 // ensuring memory availability and printing the starting value of the loss. 304 optional bool test_initialization = 32 [default = true]; 305 optional float base_lr = 5; // The base learning rate 306 // the number of iterations between displaying info. If display = 0, no info 307 // will be displayed. 308 optional int32 display = 6; 309 // Display the loss averaged over the last average_loss iterations 310 optional int32 average_loss = 33 [default = 1]; 311 optional int32 max_iter = 7; // the maximum number of iterations 312 // accumulate gradients over `iter_size` x `batch_size` instances 313 optional int32 iter_size = 36 [default = 1]; 314 315 // The learning rate decay policy. The currently implemented learning rate 316 // policies are as follows: 317 // - fixed: always return base_lr. 318 // - step: return base_lr * gamma ^ (floor(iter / step)) 319 // - exp: return base_lr * gamma ^ iter 320 // - inv: return base_lr * (1 + gamma * iter) ^ (- power) 321 // - multistep: similar to step but it allows non uniform steps defined by 322 // stepvalue 323 // - poly: the effective learning rate follows a polynomial decay, to be 324 // zero by the max_iter. return base_lr (1 - iter/max_iter) ^ (power) 325 // - sigmoid: the effective learning rate follows a sigmod decay 326 // return base_lr ( 1/(1 + exp(-gamma * (iter - stepsize)))) 327 // 328 // where base_lr, max_iter, gamma, step, stepvalue and power are defined 329 // in the solver parameter protocol buffer, and iter is the current iteration. 330 optional string lr_policy = 8; 331 optional float gamma = 9; // The parameter to compute the learning rate. 332 optional float power = 10; // The parameter to compute the learning rate. 333 optional float momentum = 11; // The momentum value. 334 optional float weight_decay = 12; // The weight decay. 335 // regularization types supported: L1 and L2 336 // controlled by weight_decay 337 optional string regularization_type = 29 [default = "L2"]; 338 // the stepsize for learning rate policy "step" 339 optional int32 stepsize = 13; 340 // the stepsize for learning rate policy "multistep" 341 repeated int32 stepvalue = 34; 342 343 // Set clip_gradients to >= 0 to clip parameter gradients to that L2 norm, 344 // whenever their actual L2 norm is larger. 345 optional float clip_gradients = 35 [default = -1]; 346 347 optional int32 snapshot = 14 [default = 0]; // The snapshot interval 348 optional string snapshot_prefix = 15; // The prefix for the snapshot. 349 // whether to snapshot diff in the results or not. Snapshotting diff will help 350 // debugging but the final protocol buffer size will be much larger. 351 optional bool snapshot_diff = 16 [default = false]; 352 enum SnapshotFormat { 353 HDF5 = 0; 354 BINARYPROTO = 1; 355 } 356 optional SnapshotFormat snapshot_format = 37 [default = BINARYPROTO]; 357 // the mode solver will use: 0 for CPU and 1 for GPU. Use GPU in default. 358 enum SolverMode { 359 CPU = 0; 360 GPU = 1; 361 } 362 optional SolverMode solver_mode = 17 [default = GPU]; 363 // the device_id will that be used in GPU mode. Use device_id = 0 in default. 364 optional int32 device_id = 18 [default = 0]; 365 // If non-negative, the seed with which the Solver will initialize the Caffe 366 // random number generator -- useful for reproducible results. Otherwise, 367 // (and by default) initialize using a seed derived from the system clock. 368 optional int64 random_seed = 20 [default = -1]; 369 370 // type of the solver 371 optional string type = 40 [default = "SGD"]; 372 373 // numerical stability for RMSProp, AdaGrad and AdaDelta and Adam 374 optional float delta = 31 [default = 1e-8]; 375 // parameters for the Adam solver 376 optional float momentum2 = 39 [default = 0.999]; 377 378 // RMSProp decay value 379 // MeanSquare(t) = rms_decay*MeanSquare(t-1) + (1-rms_decay)*SquareGradient(t) 380 optional float rms_decay = 38 [default = 0.99]; 381 382 // If true, print information about the state of the net that may help with 383 // debugging learning problems. 384 optional bool debug_info = 23 [default = false]; 385 386 // If false, don't save a snapshot after training finishes. 387 optional bool snapshot_after_train = 28 [default = true]; 388 389 // DEPRECATED: old solver enum types, use string instead 390 enum SolverType { 391 SGD = 0; 392 NESTEROV = 1; 393 ADAGRAD = 2; 394 RMSPROP = 3; 395 ADADELTA = 4; 396 ADAM = 5; 397 } 398 // DEPRECATED: use type instead of solver_type 399 optional SolverType solver_type = 30 [default = SGD]; 400} 401 402// A message that stores the solver snapshots 403message SolverState { 404 optional int32 iter = 1; // The current iteration 405 optional string learned_net = 2; // The file that stores the learned net. 406 repeated BlobProto history = 3; // The history for sgd solvers 407 optional int32 current_step = 4 [default = 0]; // The current step for learning rate 408} 409 410enum Phase { 411 TRAIN = 0; 412 TEST = 1; 413} 414 415message NetState { 416 optional Phase phase = 1 [default = TEST]; 417 optional int32 level = 2 [default = 0]; 418 repeated string stage = 3; 419} 420 421message NetStateRule { 422 // Set phase to require the NetState have a particular phase (TRAIN or TEST) 423 // to meet this rule. 424 optional Phase phase = 1; 425 426 // Set the minimum and/or maximum levels in which the layer should be used. 427 // Leave undefined to meet the rule regardless of level. 428 optional int32 min_level = 2; 429 optional int32 max_level = 3; 430 431 // Customizable sets of stages to include or exclude. 432 // The net must have ALL of the specified stages and NONE of the specified 433 // "not_stage"s to meet the rule. 434 // (Use multiple NetStateRules to specify conjunctions of stages.) 435 repeated string stage = 4; 436 repeated string not_stage = 5; 437} 438 439// Specifies training parameters (multipliers on global learning constants, 440// and the name and other settings used for weight sharing). 441message ParamSpec { 442 // The names of the parameter blobs -- useful for sharing parameters among 443 // layers, but never required otherwise. To share a parameter between two 444 // layers, give it a (non-empty) name. 445 optional string name = 1; 446 447 // Whether to require shared weights to have the same shape, or just the same 448 // count -- defaults to STRICT if unspecified. 449 optional DimCheckMode share_mode = 2; 450 enum DimCheckMode { 451 // STRICT (default) requires that num, channels, height, width each match. 452 STRICT = 0; 453 // PERMISSIVE requires only the count (num*channels*height*width) to match. 454 PERMISSIVE = 1; 455 } 456 457 // The multiplier on the global learning rate for this parameter. 458 optional float lr_mult = 3 [default = 1.0]; 459 460 // The multiplier on the global weight decay for this parameter. 461 optional float decay_mult = 4 [default = 1.0]; 462} 463 464// NOTE 465// Update the next available ID when you add a new LayerParameter field. 466// 467// LayerParameter next available layer-specific ID: 147 (last added: recurrent_param) 468message LayerParameter { 469 optional string name = 1; // the layer name 470 optional string type = 2; // the layer type 471 repeated string bottom = 3; // the name of each bottom blob 472 repeated string top = 4; // the name of each top blob 473 474 // The train / test phase for computation. 475 optional Phase phase = 10; 476 477 // The amount of weight to assign each top blob in the objective. 478 // Each layer assigns a default value, usually of either 0 or 1, 479 // to each top blob. 480 repeated float loss_weight = 5; 481 482 // Specifies training parameters (multipliers on global learning constants, 483 // and the name and other settings used for weight sharing). 484 repeated ParamSpec param = 6; 485 486 // The blobs containing the numeric parameters of the layer. 487 repeated BlobProto blobs = 7; 488 489 // Specifies whether to backpropagate to each bottom. If unspecified, 490 // Caffe will automatically infer whether each input needs backpropagation 491 // to compute parameter gradients. If set to true for some inputs, 492 // backpropagation to those inputs is forced; if set false for some inputs, 493 // backpropagation to those inputs is skipped. 494 // 495 // The size must be either 0 or equal to the number of bottoms. 496 repeated bool propagate_down = 11; 497 498 // Rules controlling whether and when a layer is included in the network, 499 // based on the current NetState. You may specify a non-zero number of rules 500 // to include OR exclude, but not both. If no include or exclude rules are 501 // specified, the layer is always included. If the current NetState meets 502 // ANY (i.e., one or more) of the specified rules, the layer is 503 // included/excluded. 504 repeated NetStateRule include = 8; 505 repeated NetStateRule exclude = 9; 506 507 // Parameters for data pre-processing. 508 optional TransformationParameter transform_param = 100; 509 510 // Parameters shared by loss layers. 511 optional LossParameter loss_param = 101; 512 513 // Layer type-specific parameters. 514 // 515 // Note: certain layers may have more than one computational engine 516 // for their implementation. These layers include an Engine type and 517 // engine parameter for selecting the implementation. 518 // The default for the engine is set by the ENGINE switch at compile-time. 519 optional AccuracyParameter accuracy_param = 102; 520 optional ArgMaxParameter argmax_param = 103; 521 optional BatchNormParameter batch_norm_param = 139; 522 optional BiasParameter bias_param = 141; 523 optional ConcatParameter concat_param = 104; 524 optional ContrastiveLossParameter contrastive_loss_param = 105; 525 optional ConvolutionParameter convolution_param = 106; 526 optional CropParameter crop_param = 144; 527 optional DataParameter data_param = 107; 528 optional DetectionOutputParameter detection_output_param = 147; 529 optional DropoutParameter dropout_param = 108; 530 optional DummyDataParameter dummy_data_param = 109; 531 optional EltwiseParameter eltwise_param = 110; 532 optional ELUParameter elu_param = 140; 533 optional EmbedParameter embed_param = 137; 534 optional ExpParameter exp_param = 111; 535 optional FlattenParameter flatten_param = 135; 536 optional HDF5DataParameter hdf5_data_param = 112; 537 optional HDF5OutputParameter hdf5_output_param = 113; 538 optional HingeLossParameter hinge_loss_param = 114; 539 optional ImageDataParameter image_data_param = 115; 540 optional InfogainLossParameter infogain_loss_param = 116; 541 optional InnerProductParameter inner_product_param = 117; 542 optional InputParameter input_param = 143; 543 optional LogParameter log_param = 134; 544 optional LRNParameter lrn_param = 118; 545 optional MemoryDataParameter memory_data_param = 119; 546 optional MVNParameter mvn_param = 120; 547 optional NormalizeBBoxParameter norm_param = 149; 548 optional PermuteParameter permute_param = 148; 549 optional ParameterParameter parameter_param = 145; 550 optional PoolingParameter pooling_param = 121; 551 optional PowerParameter power_param = 122; 552 optional PReLUParameter prelu_param = 131; 553 optional PriorBoxParameter prior_box_param = 150; 554 optional ProposalParameter proposal_param = 201; 555 optional PSROIPoolingParameter psroi_pooling_param = 10002; // https://github.com/daijifeng001/caffe-rfcn 556 optional PythonParameter python_param = 130; 557 optional RecurrentParameter recurrent_param = 146; 558 optional ReductionParameter reduction_param = 136; 559 optional ReLUParameter relu_param = 123; 560 optional ReshapeParameter reshape_param = 133; 561 optional ROIPoolingParameter roi_pooling_param = 8266711; // https://github.com/rbgirshick/caffe-fast-rcnn/tree/fast-rcnn 562 optional ScaleParameter scale_param = 142; 563 optional SigmoidParameter sigmoid_param = 124; 564 optional SoftmaxParameter softmax_param = 125; 565 optional SPPParameter spp_param = 132; 566 optional SliceParameter slice_param = 126; 567 optional TanHParameter tanh_param = 127; 568 optional ThresholdParameter threshold_param = 128; 569 optional TileParameter tile_param = 138; 570 optional WindowDataParameter window_data_param = 129; 571} 572 573// Message that stores parameters used to apply transformation 574// to the data layer's data 575message TransformationParameter { 576 // For data pre-processing, we can do simple scaling and subtracting the 577 // data mean, if provided. Note that the mean subtraction is always carried 578 // out before scaling. 579 optional float scale = 1 [default = 1]; 580 // Specify if we want to randomly mirror data. 581 optional bool mirror = 2 [default = false]; 582 // Specify if we would like to randomly crop an image. 583 optional uint32 crop_size = 3 [default = 0]; 584 // mean_file and mean_value cannot be specified at the same time 585 optional string mean_file = 4; 586 // if specified can be repeated once (would subtract it from all the channels) 587 // or can be repeated the same number of times as channels 588 // (would subtract them from the corresponding channel) 589 repeated float mean_value = 5; 590 // Force the decoded image to have 3 color channels. 591 optional bool force_color = 6 [default = false]; 592 // Force the decoded image to have 1 color channels. 593 optional bool force_gray = 7 [default = false]; 594} 595 596// Message that stores parameters shared by loss layers 597message LossParameter { 598 // If specified, ignore instances with the given label. 599 optional int32 ignore_label = 1; 600 // How to normalize the loss for loss layers that aggregate across batches, 601 // spatial dimensions, or other dimensions. Currently only implemented in 602 // SoftmaxWithLoss and SigmoidCrossEntropyLoss layers. 603 enum NormalizationMode { 604 // Divide by the number of examples in the batch times spatial dimensions. 605 // Outputs that receive the ignore label will NOT be ignored in computing 606 // the normalization factor. 607 FULL = 0; 608 // Divide by the total number of output locations that do not take the 609 // ignore_label. If ignore_label is not set, this behaves like FULL. 610 VALID = 1; 611 // Divide by the batch size. 612 BATCH_SIZE = 2; 613 // Do not normalize the loss. 614 NONE = 3; 615 } 616 // For historical reasons, the default normalization for 617 // SigmoidCrossEntropyLoss is BATCH_SIZE and *not* VALID. 618 optional NormalizationMode normalization = 3 [default = VALID]; 619 // Deprecated. Ignored if normalization is specified. If normalization 620 // is not specified, then setting this to false will be equivalent to 621 // normalization = BATCH_SIZE to be consistent with previous behavior. 622 optional bool normalize = 2; 623} 624 625// Messages that store parameters used by individual layer types follow, in 626// alphabetical order. 627 628message AccuracyParameter { 629 // When computing accuracy, count as correct by comparing the true label to 630 // the top k scoring classes. By default, only compare to the top scoring 631 // class (i.e. argmax). 632 optional uint32 top_k = 1 [default = 1]; 633 634 // The "label" axis of the prediction blob, whose argmax corresponds to the 635 // predicted label -- may be negative to index from the end (e.g., -1 for the 636 // last axis). For example, if axis == 1 and the predictions are 637 // (N x C x H x W), the label blob is expected to contain N*H*W ground truth 638 // labels with integer values in {0, 1, ..., C-1}. 639 optional int32 axis = 2 [default = 1]; 640 641 // If specified, ignore instances with the given label. 642 optional int32 ignore_label = 3; 643} 644 645message ArgMaxParameter { 646 // If true produce pairs (argmax, maxval) 647 optional bool out_max_val = 1 [default = false]; 648 optional uint32 top_k = 2 [default = 1]; 649 // The axis along which to maximise -- may be negative to index from the 650 // end (e.g., -1 for the last axis). 651 // By default ArgMaxLayer maximizes over the flattened trailing dimensions 652 // for each index of the first / num dimension. 653 optional int32 axis = 3; 654} 655 656message ConcatParameter { 657 // The axis along which to concatenate -- may be negative to index from the 658 // end (e.g., -1 for the last axis). Other axes must have the 659 // same dimension for all the bottom blobs. 660 // By default, ConcatLayer concatenates blobs along the "channels" axis (1). 661 optional int32 axis = 2 [default = 1]; 662 663 // DEPRECATED: alias for "axis" -- does not support negative indexing. 664 optional uint32 concat_dim = 1 [default = 1]; 665} 666 667message BatchNormParameter { 668 // If false, accumulate global mean/variance values via a moving average. If 669 // true, use those accumulated values instead of computing mean/variance 670 // across the batch. 671 optional bool use_global_stats = 1; 672 // How much does the moving average decay each iteration? 673 optional float moving_average_fraction = 2 [default = .999]; 674 // Small value to add to the variance estimate so that we don't divide by 675 // zero. 676 optional float eps = 3 [default = 1e-5]; 677 // It true, scale and add biases. Source: https://github.com/NVIDIA/caffe/ 678 optional bool scale_bias = 7 [default = false]; 679} 680 681message BiasParameter { 682 // The first axis of bottom[0] (the first input Blob) along which to apply 683 // bottom[1] (the second input Blob). May be negative to index from the end 684 // (e.g., -1 for the last axis). 685 // 686 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 687 // top[0] will have the same shape, and bottom[1] may have any of the 688 // following shapes (for the given value of axis): 689 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 690 // (axis == 1 == -3) 3; 3x40; 3x40x60 691 // (axis == 2 == -2) 40; 40x60 692 // (axis == 3 == -1) 60 693 // Furthermore, bottom[1] may have the empty shape (regardless of the value of 694 // "axis") -- a scalar bias. 695 optional int32 axis = 1 [default = 1]; 696 697 // (num_axes is ignored unless just one bottom is given and the bias is 698 // a learned parameter of the layer. Otherwise, num_axes is determined by the 699 // number of axes by the second bottom.) 700 // The number of axes of the input (bottom[0]) covered by the bias 701 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 702 // Set num_axes := 0, to add a zero-axis Blob: a scalar. 703 optional int32 num_axes = 2 [default = 1]; 704 705 // (filler is ignored unless just one bottom is given and the bias is 706 // a learned parameter of the layer.) 707 // The initialization for the learned bias parameter. 708 // Default is the zero (0) initialization, resulting in the BiasLayer 709 // initially performing the identity operation. 710 optional FillerParameter filler = 3; 711} 712 713message ContrastiveLossParameter { 714 // margin for dissimilar pair 715 optional float margin = 1 [default = 1.0]; 716 // The first implementation of this cost did not exactly match the cost of 717 // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. 718 // legacy_version = false (the default) uses (margin - d)^2 as proposed in the 719 // Hadsell paper. New models should probably use this version. 720 // legacy_version = true uses (margin - d^2). This is kept to support / 721 // reproduce existing models and results 722 optional bool legacy_version = 2 [default = false]; 723} 724 725message ConvolutionParameter { 726 optional uint32 num_output = 1; // The number of outputs for the layer 727 optional bool bias_term = 2 [default = true]; // whether to have bias terms 728 729 // Pad, kernel size, and stride are all given as a single value for equal 730 // dimensions in all spatial dimensions, or once per spatial dimension. 731 repeated uint32 pad = 3; // The padding size; defaults to 0 732 repeated uint32 kernel_size = 4; // The kernel size 733 repeated uint32 stride = 6; // The stride; defaults to 1 734 // Factor used to dilate the kernel, (implicitly) zero-filling the resulting 735 // holes. (Kernel dilation is sometimes referred to by its use in the 736 // algorithme à trous from Holschneider et al. 1987.) 737 repeated uint32 dilation = 18; // The dilation; defaults to 1 738 739 // For 2D convolution only, the *_h and *_w versions may also be used to 740 // specify both spatial dimensions. 741 optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) 742 optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) 743 optional uint32 kernel_h = 11; // The kernel height (2D only) 744 optional uint32 kernel_w = 12; // The kernel width (2D only) 745 optional uint32 stride_h = 13; // The stride height (2D only) 746 optional uint32 stride_w = 14; // The stride width (2D only) 747 748 optional uint32 group = 5 [default = 1]; // The group size for group conv 749 750 optional FillerParameter weight_filler = 7; // The filler for the weight 751 optional FillerParameter bias_filler = 8; // The filler for the bias 752 enum Engine { 753 DEFAULT = 0; 754 CAFFE = 1; 755 CUDNN = 2; 756 } 757 optional Engine engine = 15 [default = DEFAULT]; 758 759 // The axis to interpret as "channels" when performing convolution. 760 // Preceding dimensions are treated as independent inputs; 761 // succeeding dimensions are treated as "spatial". 762 // With (N, C, H, W) inputs, and axis == 1 (the default), we perform 763 // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for 764 // groups g>1) filters across the spatial axes (H, W) of the input. 765 // With (N, C, D, H, W) inputs, and axis == 1, we perform 766 // N independent 3D convolutions, sliding (C/g)-channels 767 // filters across the spatial axes (D, H, W) of the input. 768 optional int32 axis = 16 [default = 1]; 769 770 // Whether to force use of the general ND convolution, even if a specific 771 // implementation for blobs of the appropriate number of spatial dimensions 772 // is available. (Currently, there is only a 2D-specific convolution 773 // implementation; for input blobs with num_axes != 2, this option is 774 // ignored and the ND implementation will be used.) 775 optional bool force_nd_im2col = 17 [default = false]; 776} 777 778message CropParameter { 779 // To crop, elements of the first bottom are selected to fit the dimensions 780 // of the second, reference bottom. The crop is configured by 781 // - the crop `axis` to pick the dimensions for cropping 782 // - the crop `offset` to set the shift for all/each dimension 783 // to align the cropped bottom with the reference bottom. 784 // All dimensions up to but excluding `axis` are preserved, while 785 // the dimensions including and trailing `axis` are cropped. 786 // If only one `offset` is set, then all dimensions are offset by this amount. 787 // Otherwise, the number of offsets must equal the number of cropped axes to 788 // shift the crop in each dimension accordingly. 789 // Note: standard dimensions are N,C,H,W so the default is a spatial crop, 790 // and `axis` may be negative to index from the end (e.g., -1 for the last 791 // axis). 792 optional int32 axis = 1 [default = 2]; 793 repeated uint32 offset = 2; 794} 795 796message DataParameter { 797 enum DB { 798 LEVELDB = 0; 799 LMDB = 1; 800 } 801 // Specify the data source. 802 optional string source = 1; 803 // Specify the batch size. 804 optional uint32 batch_size = 4; 805 // The rand_skip variable is for the data layer to skip a few data points 806 // to avoid all asynchronous sgd clients to start at the same point. The skip 807 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 808 // be larger than the number of keys in the database. 809 // DEPRECATED. Each solver accesses a different subset of the database. 810 optional uint32 rand_skip = 7 [default = 0]; 811 optional DB backend = 8 [default = LEVELDB]; 812 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 813 // simple scaling and subtracting the data mean, if provided. Note that the 814 // mean subtraction is always carried out before scaling. 815 optional float scale = 2 [default = 1]; 816 optional string mean_file = 3; 817 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 818 // crop an image. 819 optional uint32 crop_size = 5 [default = 0]; 820 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 821 // data. 822 optional bool mirror = 6 [default = false]; 823 // Force the encoded image to have 3 color channels 824 optional bool force_encoded_color = 9 [default = false]; 825 // Prefetch queue (Number of batches to prefetch to host memory, increase if 826 // data access bandwidth varies). 827 optional uint32 prefetch = 10 [default = 4]; 828} 829 830message NonMaximumSuppressionParameter { 831 // Threshold to be used in nms. 832 optional float nms_threshold = 1 [default = 0.3]; 833 // Maximum number of results to be kept. 834 optional int32 top_k = 2; 835 // Parameter for adaptive nms. 836 optional float eta = 3 [default = 1.0]; 837} 838 839message SaveOutputParameter { 840 // Output directory. If not empty, we will save the results. 841 optional string output_directory = 1; 842 // Output name prefix. 843 optional string output_name_prefix = 2; 844 // Output format. 845 // VOC - PASCAL VOC output format. 846 // COCO - MS COCO output format. 847 optional string output_format = 3; 848 // If you want to output results, must also provide the following two files. 849 // Otherwise, we will ignore saving results. 850 // label map file. 851 optional string label_map_file = 4; 852 // A file which contains a list of names and sizes with same order 853 // of the input DB. The file is in the following format: 854 // name height width 855 // ... 856 optional string name_size_file = 5; 857 // Number of test images. It can be less than the lines specified in 858 // name_size_file. For example, when we only want to evaluate on part 859 // of the test images. 860 optional uint32 num_test_image = 6; 861} 862 863message DropoutParameter { 864 optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio 865 // Faster-RCNN framework's parameter. 866 // source: https://github.com/rbgirshick/caffe-fast-rcnn/tree/faster-rcnn 867 optional bool scale_train = 2 [default = true]; // scale train or test phase 868} 869 870// DummyDataLayer fills any number of arbitrarily shaped blobs with random 871// (or constant) data generated by "Fillers" (see "message FillerParameter"). 872message DummyDataParameter { 873 // This layer produces N >= 1 top blobs. DummyDataParameter must specify 1 or N 874 // shape fields, and 0, 1 or N data_fillers. 875 // 876 // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. 877 // If 1 data_filler is specified, it is applied to all top blobs. If N are 878 // specified, the ith is applied to the ith top blob. 879 repeated FillerParameter data_filler = 1; 880 repeated BlobShape shape = 6; 881 882 // 4D dimensions -- deprecated. Use "shape" instead. 883 repeated uint32 num = 2; 884 repeated uint32 channels = 3; 885 repeated uint32 height = 4; 886 repeated uint32 width = 5; 887} 888 889message EltwiseParameter { 890 enum EltwiseOp { 891 PROD = 0; 892 SUM = 1; 893 MAX = 2; 894 } 895 optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation 896 repeated float coeff = 2; // blob-wise coefficient for SUM operation 897 898 // Whether to use an asymptotically slower (for >2 inputs) but stabler method 899 // of computing the gradient for the PROD operation. (No effect for SUM op.) 900 optional bool stable_prod_grad = 3 [default = true]; 901} 902 903// Message that stores parameters used by ELULayer 904message ELUParameter { 905 // Described in: 906 // Clevert, D.-A., Unterthiner, T., & Hochreiter, S. (2015). Fast and Accurate 907 // Deep Network Learning by Exponential Linear Units (ELUs). arXiv 908 optional float alpha = 1 [default = 1]; 909} 910 911// Message that stores parameters used by EmbedLayer 912message EmbedParameter { 913 optional uint32 num_output = 1; // The number of outputs for the layer 914 // The input is given as integers to be interpreted as one-hot 915 // vector indices with dimension num_input. Hence num_input should be 916 // 1 greater than the maximum possible input value. 917 optional uint32 input_dim = 2; 918 919 optional bool bias_term = 3 [default = true]; // Whether to use a bias term 920 optional FillerParameter weight_filler = 4; // The filler for the weight 921 optional FillerParameter bias_filler = 5; // The filler for the bias 922 923} 924 925// Message that stores parameters used by ExpLayer 926message ExpParameter { 927 // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. 928 // Or if base is set to the default (-1), base is set to e, 929 // so y = exp(shift + scale * x). 930 optional float base = 1 [default = -1.0]; 931 optional float scale = 2 [default = 1.0]; 932 optional float shift = 3 [default = 0.0]; 933} 934 935/// Message that stores parameters used by FlattenLayer 936message FlattenParameter { 937 // The first axis to flatten: all preceding axes are retained in the output. 938 // May be negative to index from the end (e.g., -1 for the last axis). 939 optional int32 axis = 1 [default = 1]; 940 941 // The last axis to flatten: all following axes are retained in the output. 942 // May be negative to index from the end (e.g., the default -1 for the last 943 // axis). 944 optional int32 end_axis = 2 [default = -1]; 945} 946 947// Message that stores parameters used by HDF5DataLayer 948message HDF5DataParameter { 949 // Specify the data source. 950 optional string source = 1; 951 // Specify the batch size. 952 optional uint32 batch_size = 2; 953 954 // Specify whether to shuffle the data. 955 // If shuffle == true, the ordering of the HDF5 files is shuffled, 956 // and the ordering of data within any given HDF5 file is shuffled, 957 // but data between different files are not interleaved; all of a file's 958 // data are output (in a random order) before moving onto another file. 959 optional bool shuffle = 3 [default = false]; 960} 961 962message HDF5OutputParameter { 963 optional string file_name = 1; 964} 965 966message HingeLossParameter { 967 enum Norm { 968 L1 = 1; 969 L2 = 2; 970 } 971 // Specify the Norm to use L1 or L2 972 optional Norm norm = 1 [default = L1]; 973} 974 975message ImageDataParameter { 976 // Specify the data source. 977 optional string source = 1; 978 // Specify the batch size. 979 optional uint32 batch_size = 4 [default = 1]; 980 // The rand_skip variable is for the data layer to skip a few data points 981 // to avoid all asynchronous sgd clients to start at the same point. The skip 982 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 983 // be larger than the number of keys in the database. 984 optional uint32 rand_skip = 7 [default = 0]; 985 // Whether or not ImageLayer should shuffle the list of files at every epoch. 986 optional bool shuffle = 8 [default = false]; 987 // It will also resize images if new_height or new_width are not zero. 988 optional uint32 new_height = 9 [default = 0]; 989 optional uint32 new_width = 10 [default = 0]; 990 // Specify if the images are color or gray 991 optional bool is_color = 11 [default = true]; 992 // DEPRECATED. See TransformationParameter. For data pre-processing, we can do 993 // simple scaling and subtracting the data mean, if provided. Note that the 994 // mean subtraction is always carried out before scaling. 995 optional float scale = 2 [default = 1]; 996 optional string mean_file = 3; 997 // DEPRECATED. See TransformationParameter. Specify if we would like to randomly 998 // crop an image. 999 optional uint32 crop_size = 5 [default = 0]; 1000 // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror 1001 // data. 1002 optional bool mirror = 6 [default = false]; 1003 optional string root_folder = 12 [default = ""]; 1004} 1005 1006message InfogainLossParameter { 1007 // Specify the infogain matrix source. 1008 optional string source = 1; 1009} 1010 1011message InnerProductParameter { 1012 optional uint32 num_output = 1; // The number of outputs for the layer 1013 optional bool bias_term = 2 [default = true]; // whether to have bias terms 1014 optional FillerParameter weight_filler = 3; // The filler for the weight 1015 optional FillerParameter bias_filler = 4; // The filler for the bias 1016 1017 // The first axis to be lumped into a single inner product computation; 1018 // all preceding axes are retained in the output. 1019 // May be negative to index from the end (e.g., -1 for the last axis). 1020 optional int32 axis = 5 [default = 1]; 1021 // Specify whether to transpose the weight matrix or not. 1022 // If transpose == true, any operations will be performed on the transpose 1023 // of the weight matrix. The weight matrix itself is not going to be transposed 1024 // but rather the transfer flag of operations will be toggled accordingly. 1025 optional bool transpose = 6 [default = false]; 1026} 1027 1028message InputParameter { 1029 // This layer produces N >= 1 top blob(s) to be assigned manually. 1030 // Define N shapes to set a shape for each top. 1031 // Define 1 shape to set the same shape for every top. 1032 // Define no shape to defer to reshaping manually. 1033 repeated BlobShape shape = 1; 1034} 1035 1036// Message that stores parameters used by LogLayer 1037message LogParameter { 1038 // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. 1039 // Or if base is set to the default (-1), base is set to e, 1040 // so y = ln(shift + scale * x) = log_e(shift + scale * x) 1041 optional float base = 1 [default = -1.0]; 1042 optional float scale = 2 [default = 1.0]; 1043 optional float shift = 3 [default = 0.0]; 1044} 1045 1046// Message that stores parameters used by LRNLayer 1047message LRNParameter { 1048 optional uint32 local_size = 1 [default = 5]; 1049 optional float alpha = 2 [default = 1.]; 1050 optional float beta = 3 [default = 0.75]; 1051 enum NormRegion { 1052 ACROSS_CHANNELS = 0; 1053 WITHIN_CHANNEL = 1; 1054 } 1055 optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; 1056 optional float k = 5 [default = 1.]; 1057 enum Engine { 1058 DEFAULT = 0; 1059 CAFFE = 1; 1060 CUDNN = 2; 1061 } 1062 optional Engine engine = 6 [default = DEFAULT]; 1063} 1064 1065message MemoryDataParameter { 1066 optional uint32 batch_size = 1; 1067 optional uint32 channels = 2; 1068 optional uint32 height = 3; 1069 optional uint32 width = 4; 1070} 1071 1072message MVNParameter { 1073 // This parameter can be set to false to normalize mean only 1074 optional bool normalize_variance = 1 [default = true]; 1075 1076 // This parameter can be set to true to perform DNN-like MVN 1077 optional bool across_channels = 2 [default = false]; 1078 1079 // Epsilon for not dividing by zero while normalizing variance 1080 optional float eps = 3 [default = 1e-9]; 1081} 1082 1083message ParameterParameter { 1084 optional BlobShape shape = 1; 1085} 1086 1087message PoolingParameter { 1088 enum PoolMethod { 1089 MAX = 0; 1090 AVE = 1; 1091 STOCHASTIC = 2; 1092 } 1093 optional PoolMethod pool = 1 [default = MAX]; // The pooling method 1094 // Pad, kernel size, and stride are all given as a single value for equal 1095 // dimensions in height and width or as Y, X pairs. 1096 optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) 1097 optional uint32 pad_h = 9 [default = 0]; // The padding height 1098 optional uint32 pad_w = 10 [default = 0]; // The padding width 1099 optional uint32 kernel_size = 2; // The kernel size (square) 1100 optional uint32 kernel_h = 5; // The kernel height 1101 optional uint32 kernel_w = 6; // The kernel width 1102 optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) 1103 optional uint32 stride_h = 7; // The stride height 1104 optional uint32 stride_w = 8; // The stride width 1105 enum Engine { 1106 DEFAULT = 0; 1107 CAFFE = 1; 1108 CUDNN = 2; 1109 } 1110 optional Engine engine = 11 [default = DEFAULT]; 1111 // If global_pooling then it will pool over the size of the bottom by doing 1112 // kernel_h = bottom->height and kernel_w = bottom->width 1113 optional bool global_pooling = 12 [default = false]; 1114 // Specify floor/ceil mode 1115 // source: https://github.com/BVLC/caffe/pull/3057 1116 optional bool ceil_mode = 13 [default = true]; 1117} 1118 1119message PowerParameter { 1120 // PowerLayer computes outputs y = (shift + scale * x) ^ power. 1121 optional float power = 1 [default = 1.0]; 1122 optional float scale = 2 [default = 1.0]; 1123 optional float shift = 3 [default = 0.0]; 1124} 1125 1126message PythonParameter { 1127 optional string module = 1; 1128 optional string layer = 2; 1129 // This value is set to the attribute `param_str` of the `PythonLayer` object 1130 // in Python before calling the `setup()` method. This could be a number, 1131 // string, dictionary in Python dict format, JSON, etc. You may parse this 1132 // string in `setup` method and use it in `forward` and `backward`. 1133 optional string param_str = 3 [default = '']; 1134 // Whether this PythonLayer is shared among worker solvers during data parallelism. 1135 // If true, each worker solver sequentially run forward from this layer. 1136 // This value should be set true if you are using it as a data layer. 1137 optional bool share_in_parallel = 4 [default = false]; 1138} 1139 1140// Message that stores parameters used by RecurrentLayer 1141message RecurrentParameter { 1142 // The dimension of the output (and usually hidden state) representation -- 1143 // must be explicitly set to non-zero. 1144 optional uint32 num_output = 1 [default = 0]; 1145 1146 optional FillerParameter weight_filler = 2; // The filler for the weight 1147 optional FillerParameter bias_filler = 3; // The filler for the bias 1148 1149 // Whether to enable displaying debug_info in the unrolled recurrent net. 1150 optional bool debug_info = 4 [default = false]; 1151 1152 // Whether to add as additional inputs (bottoms) the initial hidden state 1153 // blobs, and add as additional outputs (tops) the final timestep hidden state 1154 // blobs. The number of additional bottom/top blobs required depends on the 1155 // recurrent architecture -- e.g., 1 for RNNs, 2 for LSTMs. 1156 optional bool expose_hidden = 5 [default = false]; 1157} 1158 1159// Message that stores parameters used by ReductionLayer 1160message ReductionParameter { 1161 enum ReductionOp { 1162 SUM = 1; 1163 ASUM = 2; 1164 SUMSQ = 3; 1165 MEAN = 4; 1166 } 1167 1168 optional ReductionOp operation = 1 [default = SUM]; // reduction operation 1169 1170 // The first axis to reduce to a scalar -- may be negative to index from the 1171 // end (e.g., -1 for the last axis). 1172 // (Currently, only reduction along ALL "tail" axes is supported; reduction 1173 // of axis M through N, where N < num_axes - 1, is unsupported.) 1174 // Suppose we have an n-axis bottom Blob with shape: 1175 // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). 1176 // If axis == m, the output Blob will have shape 1177 // (d0, d1, d2, ..., d(m-1)), 1178 // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) 1179 // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. 1180 // If axis == 0 (the default), the output Blob always has the empty shape 1181 // (count 1), performing reduction across the entire input -- 1182 // often useful for creating new loss functions. 1183 optional int32 axis = 2 [default = 0]; 1184 1185 optional float coeff = 3 [default = 1.0]; // coefficient for output 1186} 1187 1188// Message that stores parameters used by ReLULayer 1189message ReLUParameter { 1190 // Allow non-zero slope for negative inputs to speed up optimization 1191 // Described in: 1192 // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities 1193 // improve neural network acoustic models. In ICML Workshop on Deep Learning 1194 // for Audio, Speech, and Language Processing. 1195 optional float negative_slope = 1 [default = 0]; 1196 enum Engine { 1197 DEFAULT = 0; 1198 CAFFE = 1; 1199 CUDNN = 2; 1200 } 1201 optional Engine engine = 2 [default = DEFAULT]; 1202} 1203 1204message ReshapeParameter { 1205 // Specify the output dimensions. If some of the dimensions are set to 0, 1206 // the corresponding dimension from the bottom layer is used (unchanged). 1207 // Exactly one dimension may be set to -1, in which case its value is 1208 // inferred from the count of the bottom blob and the remaining dimensions. 1209 // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: 1210 // 1211 // layer { 1212 // type: "Reshape" bottom: "input" top: "output" 1213 // reshape_param { ... } 1214 // } 1215 // 1216 // If "input" is 2D with shape 2 x 8, then the following reshape_param 1217 // specifications are all equivalent, producing a 3D blob "output" with shape 1218 // 2 x 2 x 4: 1219 // 1220 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1221 // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } 1222 // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } 1223 // reshape_param { shape { dim: 0 dim:-1 dim: 4 } } 1224 // 1225 optional BlobShape shape = 1; 1226 1227 // axis and num_axes control the portion of the bottom blob's shape that are 1228 // replaced by (included in) the reshape. By default (axis == 0 and 1229 // num_axes == -1), the entire bottom blob shape is included in the reshape, 1230 // and hence the shape field must specify the entire output shape. 1231 // 1232 // axis may be non-zero to retain some portion of the beginning of the input 1233 // shape (and may be negative to index from the end; e.g., -1 to begin the 1234 // reshape after the last axis, including nothing in the reshape, 1235 // -2 to include only the last axis, etc.). 1236 // 1237 // For example, suppose "input" is a 2D blob with shape 2 x 8. 1238 // Then the following ReshapeLayer specifications are all equivalent, 1239 // producing a blob "output" with shape 2 x 2 x 4: 1240 // 1241 // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } 1242 // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } 1243 // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } 1244 // 1245 // num_axes specifies the extent of the reshape. 1246 // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on 1247 // input axes in the range [axis, axis+num_axes]. 1248 // num_axes may also be -1, the default, to include all remaining axes 1249 // (starting from axis). 1250 // 1251 // For example, suppose "input" is a 2D blob with shape 2 x 8. 1252 // Then the following ReshapeLayer specifications are equivalent, 1253 // producing a blob "output" with shape 1 x 2 x 8. 1254 // 1255 // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } 1256 // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } 1257 // reshape_param { shape { dim: 1 } num_axes: 0 } 1258 // 1259 // On the other hand, these would produce output blob shape 2 x 1 x 8: 1260 // 1261 // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } 1262 // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } 1263 // 1264 optional int32 axis = 2 [default = 0]; 1265 optional int32 num_axes = 3 [default = -1]; 1266} 1267 1268message ScaleParameter { 1269 // The first axis of bottom[0] (the first input Blob) along which to apply 1270 // bottom[1] (the second input Blob). May be negative to index from the end 1271 // (e.g., -1 for the last axis). 1272 // 1273 // For example, if bottom[0] is 4D with shape 100x3x40x60, the output 1274 // top[0] will have the same shape, and bottom[1] may have any of the 1275 // following shapes (for the given value of axis): 1276 // (axis == 0 == -4) 100; 100x3; 100x3x40; 100x3x40x60 1277 // (axis == 1 == -3) 3; 3x40; 3x40x60 1278 // (axis == 2 == -2) 40; 40x60 1279 // (axis == 3 == -1) 60 1280 // Furthermore, bottom[1] may have the empty shape (regardless of the value of 1281 // "axis") -- a scalar multiplier. 1282 optional int32 axis = 1 [default = 1]; 1283 1284 // (num_axes is ignored unless just one bottom is given and the scale is 1285 // a learned parameter of the layer. Otherwise, num_axes is determined by the 1286 // number of axes by the second bottom.) 1287 // The number of axes of the input (bottom[0]) covered by the scale 1288 // parameter, or -1 to cover all axes of bottom[0] starting from `axis`. 1289 // Set num_axes := 0, to multiply with a zero-axis Blob: a scalar. 1290 optional int32 num_axes = 2 [default = 1]; 1291 1292 // (filler is ignored unless just one bottom is given and the scale is 1293 // a learned parameter of the layer.) 1294 // The initialization for the learned scale parameter. 1295 // Default is the unit (1) initialization, resulting in the ScaleLayer 1296 // initially performing the identity operation. 1297 optional FillerParameter filler = 3; 1298 1299 // Whether to also learn a bias (equivalent to a ScaleLayer+BiasLayer, but 1300 // may be more efficient). Initialized with bias_filler (defaults to 0). 1301 optional bool bias_term = 4 [default = false]; 1302 optional FillerParameter bias_filler = 5; 1303} 1304 1305message SigmoidParameter { 1306 enum Engine { 1307 DEFAULT = 0; 1308 CAFFE = 1; 1309 CUDNN = 2; 1310 } 1311 optional Engine engine = 1 [default = DEFAULT]; 1312} 1313 1314message SliceParameter { 1315 // The axis along which to slice -- may be negative to index from the end 1316 // (e.g., -1 for the last axis). 1317 // By default, SliceLayer concatenates blobs along the "channels" axis (1). 1318 optional int32 axis = 3 [default = 1]; 1319 repeated uint32 slice_point = 2; 1320 1321 // DEPRECATED: alias for "axis" -- does not support negative indexing. 1322 optional uint32 slice_dim = 1 [default = 1]; 1323} 1324 1325// Message that stores parameters used by SoftmaxLayer, SoftmaxWithLossLayer 1326message SoftmaxParameter { 1327 enum Engine { 1328 DEFAULT = 0; 1329 CAFFE = 1; 1330 CUDNN = 2; 1331 } 1332 optional Engine engine = 1 [default = DEFAULT]; 1333 1334 // The axis along which to perform the softmax -- may be negative to index 1335 // from the end (e.g., -1 for the last axis). 1336 // Any other axes will be evaluated as independent softmaxes. 1337 optional int32 axis = 2 [default = 1]; 1338} 1339 1340message TanHParameter { 1341 enum Engine { 1342 DEFAULT = 0; 1343 CAFFE = 1; 1344 CUDNN = 2; 1345 } 1346 optional Engine engine = 1 [default = DEFAULT]; 1347} 1348 1349// Message that stores parameters used by TileLayer 1350message TileParameter { 1351 // The index of the axis to tile. 1352 optional int32 axis = 1 [default = 1]; 1353 1354 // The number of copies (tiles) of the blob to output. 1355 optional int32 tiles = 2; 1356} 1357 1358// Message that stores parameters used by ThresholdLayer 1359message ThresholdParameter { 1360 optional float threshold = 1 [default = 0]; // Strictly positive values 1361} 1362 1363message WindowDataParameter { 1364 // Specify the data source. 1365 optional string source = 1; 1366 // For data pre-processing, we can do simple scaling and subtracting the 1367 // data mean, if provided. Note that the mean subtraction is always carried 1368 // out before scaling. 1369 optional float scale = 2 [default = 1]; 1370 optional string mean_file = 3; 1371 // Specify the batch size. 1372 optional uint32 batch_size = 4; 1373 // Specify if we would like to randomly crop an image. 1374 optional uint32 crop_size = 5 [default = 0]; 1375 // Specify if we want to randomly mirror data. 1376 optional bool mirror = 6 [default = false]; 1377 // Foreground (object) overlap threshold 1378 optional float fg_threshold = 7 [default = 0.5]; 1379 // Background (non-object) overlap threshold 1380 optional float bg_threshold = 8 [default = 0.5]; 1381 // Fraction of batch that should be foreground objects 1382 optional float fg_fraction = 9 [default = 0.25]; 1383 // Amount of contextual padding to add around a window 1384 // (used only by the window_data_layer) 1385 optional uint32 context_pad = 10 [default = 0]; 1386 // Mode for cropping out a detection window 1387 // warp: cropped window is warped to a fixed size and aspect ratio 1388 // square: the tightest square around the window is cropped 1389 optional string crop_mode = 11 [default = "warp"]; 1390 // cache_images: will load all images in memory for faster access 1391 optional bool cache_images = 12 [default = false]; 1392 // append root_folder to locate images 1393 optional string root_folder = 13 [default = ""]; 1394} 1395 1396message SPPParameter { 1397 enum PoolMethod { 1398 MAX = 0; 1399 AVE = 1; 1400 STOCHASTIC = 2; 1401 } 1402 optional uint32 pyramid_height = 1; 1403 optional PoolMethod pool = 2 [default = MAX]; // The pooling method 1404 enum Engine { 1405 DEFAULT = 0; 1406 CAFFE = 1; 1407 CUDNN = 2; 1408 } 1409 optional Engine engine = 6 [default = DEFAULT]; 1410} 1411 1412// DEPRECATED: use LayerParameter. 1413message V1LayerParameter { 1414 repeated string bottom = 2; 1415 repeated string top = 3; 1416 optional string name = 4; 1417 repeated NetStateRule include = 32; 1418 repeated NetStateRule exclude = 33; 1419 enum LayerType { 1420 NONE = 0; 1421 ABSVAL = 35; 1422 ACCURACY = 1; 1423 ARGMAX = 30; 1424 BNLL = 2; 1425 CONCAT = 3; 1426 CONTRASTIVE_LOSS = 37; 1427 CONVOLUTION = 4; 1428 DATA = 5; 1429 DECONVOLUTION = 39; 1430 DROPOUT = 6; 1431 DUMMY_DATA = 32; 1432 EUCLIDEAN_LOSS = 7; 1433 ELTWISE = 25; 1434 EXP = 38; 1435 FLATTEN = 8; 1436 HDF5_DATA = 9; 1437 HDF5_OUTPUT = 10; 1438 HINGE_LOSS = 28; 1439 IM2COL = 11; 1440 IMAGE_DATA = 12; 1441 INFOGAIN_LOSS = 13; 1442 INNER_PRODUCT = 14; 1443 LRN = 15; 1444 MEMORY_DATA = 29; 1445 MULTINOMIAL_LOGISTIC_LOSS = 16; 1446 MVN = 34; 1447 POOLING = 17; 1448 POWER = 26; 1449 RELU = 18; 1450 SIGMOID = 19; 1451 SIGMOID_CROSS_ENTROPY_LOSS = 27; 1452 SILENCE = 36; 1453 SOFTMAX = 20; 1454 SOFTMAX_LOSS = 21; 1455 SPLIT = 22; 1456 SLICE = 33; 1457 TANH = 23; 1458 WINDOW_DATA = 24; 1459 THRESHOLD = 31; 1460 } 1461 optional LayerType type = 5; 1462 repeated BlobProto blobs = 6; 1463 repeated string param = 1001; 1464 repeated DimCheckMode blob_share_mode = 1002; 1465 enum DimCheckMode { 1466 STRICT = 0; 1467 PERMISSIVE = 1; 1468 } 1469 repeated float blobs_lr = 7; 1470 repeated float weight_decay = 8; 1471 repeated float loss_weight = 35; 1472 optional AccuracyParameter accuracy_param = 27; 1473 optional ArgMaxParameter argmax_param = 23; 1474 optional ConcatParameter concat_param = 9; 1475 optional ContrastiveLossParameter contrastive_loss_param = 40; 1476 optional ConvolutionParameter convolution_param = 10; 1477 optional DataParameter data_param = 11; 1478 optional DropoutParameter dropout_param = 12; 1479 optional DummyDataParameter dummy_data_param = 26; 1480 optional EltwiseParameter eltwise_param = 24; 1481 optional ExpParameter exp_param = 41; 1482 optional HDF5DataParameter hdf5_data_param = 13; 1483 optional HDF5OutputParameter hdf5_output_param = 14; 1484 optional HingeLossParameter hinge_loss_param = 29; 1485 optional ImageDataParameter image_data_param = 15; 1486 optional InfogainLossParameter infogain_loss_param = 16; 1487 optional InnerProductParameter inner_product_param = 17; 1488 optional LRNParameter lrn_param = 18; 1489 optional MemoryDataParameter memory_data_param = 22; 1490 optional MVNParameter mvn_param = 34; 1491 optional PoolingParameter pooling_param = 19; 1492 optional PowerParameter power_param = 21; 1493 optional ReLUParameter relu_param = 30; 1494 optional SigmoidParameter sigmoid_param = 38; 1495 optional SoftmaxParameter softmax_param = 39; 1496 optional SliceParameter slice_param = 31; 1497 optional TanHParameter tanh_param = 37; 1498 optional ThresholdParameter threshold_param = 25; 1499 optional WindowDataParameter window_data_param = 20; 1500 optional TransformationParameter transform_param = 36; 1501 optional LossParameter loss_param = 42; 1502 optional V0LayerParameter layer = 1; 1503} 1504 1505// DEPRECATED: V0LayerParameter is the old way of specifying layer parameters 1506// in Caffe. We keep this message type around for legacy support. 1507message V0LayerParameter { 1508 optional string name = 1; // the layer name 1509 optional string type = 2; // the string to specify the layer type 1510 1511 // Parameters to specify layers with inner products. 1512 optional uint32 num_output = 3; // The number of outputs for the layer 1513 optional bool biasterm = 4 [default = true]; // whether to have bias terms 1514 optional FillerParameter weight_filler = 5; // The filler for the weight 1515 optional FillerParameter bias_filler = 6; // The filler for the bias 1516 1517 optional uint32 pad = 7 [default = 0]; // The padding size 1518 optional uint32 kernelsize = 8; // The kernel size 1519 optional uint32 group = 9 [default = 1]; // The group size for group conv 1520 optional uint32 stride = 10 [default = 1]; // The stride 1521 enum PoolMethod { 1522 MAX = 0; 1523 AVE = 1; 1524 STOCHASTIC = 2; 1525 } 1526 optional PoolMethod pool = 11 [default = MAX]; // The pooling method 1527 optional float dropout_ratio = 12 [default = 0.5]; // dropout ratio 1528 1529 optional uint32 local_size = 13 [default = 5]; // for local response norm 1530 optional float alpha = 14 [default = 1.]; // for local response norm 1531 optional float beta = 15 [default = 0.75]; // for local response norm 1532 optional float k = 22 [default = 1.]; 1533 1534 // For data layers, specify the data source 1535 optional string source = 16; 1536 // For data pre-processing, we can do simple scaling and subtracting the 1537 // data mean, if provided. Note that the mean subtraction is always carried 1538 // out before scaling. 1539 optional float scale = 17 [default = 1]; 1540 optional string meanfile = 18; 1541 // For data layers, specify the batch size. 1542 optional uint32 batchsize = 19; 1543 // For data layers, specify if we would like to randomly crop an image. 1544 optional uint32 cropsize = 20 [default = 0]; 1545 // For data layers, specify if we want to randomly mirror data. 1546 optional bool mirror = 21 [default = false]; 1547 1548 // The blobs containing the numeric parameters of the layer 1549 repeated BlobProto blobs = 50; 1550 // The ratio that is multiplied on the global learning rate. If you want to 1551 // set the learning ratio for one blob, you need to set it for all blobs. 1552 repeated float blobs_lr = 51; 1553 // The weight decay that is multiplied on the global weight decay. 1554 repeated float weight_decay = 52; 1555 1556 // The rand_skip variable is for the data layer to skip a few data points 1557 // to avoid all asynchronous sgd clients to start at the same point. The skip 1558 // point would be set as rand_skip * rand(0,1). Note that rand_skip should not 1559 // be larger than the number of keys in the database. 1560 optional uint32 rand_skip = 53 [default = 0]; 1561 1562 // Fields related to detection (det_*) 1563 // foreground (object) overlap threshold 1564 optional float det_fg_threshold = 54 [default = 0.5]; 1565 // background (non-object) overlap threshold 1566 optional float det_bg_threshold = 55 [default = 0.5]; 1567 // Fraction of batch that should be foreground objects 1568 optional float det_fg_fraction = 56 [default = 0.25]; 1569 1570 // optional bool OBSOLETE_can_clobber = 57 [default = true]; 1571 1572 // Amount of contextual padding to add around a window 1573 // (used only by the window_data_layer) 1574 optional uint32 det_context_pad = 58 [default = 0]; 1575 1576 // Mode for cropping out a detection window 1577 // warp: cropped window is warped to a fixed size and aspect ratio 1578 // square: the tightest square around the window is cropped 1579 optional string det_crop_mode = 59 [default = "warp"]; 1580 1581 // For ReshapeLayer, one needs to specify the new dimensions. 1582 optional int32 new_num = 60 [default = 0]; 1583 optional int32 new_channels = 61 [default = 0]; 1584 optional int32 new_height = 62 [default = 0]; 1585 optional int32 new_width = 63 [default = 0]; 1586 1587 // Whether or not ImageLayer should shuffle the list of files at every epoch. 1588 // It will also resize images if new_height or new_width are not zero. 1589 optional bool shuffle_images = 64 [default = false]; 1590 1591 // For ConcatLayer, one needs to specify the dimension for concatenation, and 1592 // the other dimensions must be the same for all the bottom blobs. 1593 // By default it will concatenate blobs along the channels dimension. 1594 optional uint32 concat_dim = 65 [default = 1]; 1595 1596 optional HDF5OutputParameter hdf5_output_param = 1001; 1597} 1598 1599message PReLUParameter { 1600 // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: 1601 // Surpassing Human-Level Performance on ImageNet Classification, 2015. 1602 1603 // Initial value of a_i. Default is a_i=0.25 for all i. 1604 optional FillerParameter filler = 1; 1605 // Whether or not slope parameters are shared across channels. 1606 optional bool channel_shared = 2 [default = false]; 1607} 1608 1609// The normalized bounding box [0, 1] w.r.t. the input image size. 1610message NormalizedBBox { 1611 optional float xmin = 1; 1612 optional float ymin = 2; 1613 optional float xmax = 3; 1614 optional float ymax = 4; 1615 optional int32 label = 5; 1616 optional bool difficult = 6; 1617 optional float score = 7; 1618 optional float size = 8; 1619} 1620 1621// origin: https://github.com/rbgirshick/caffe-fast-rcnn/tree/fast-rcnn 1622// Message that stores parameters used by ROIPoolingLayer 1623message ROIPoolingParameter { 1624 // Pad, kernel size, and stride are all given as a single value for equal 1625 // dimensions in height and width or as Y, X pairs. 1626 optional uint32 pooled_h = 1 [default = 0]; // The pooled output height 1627 optional uint32 pooled_w = 2 [default = 0]; // The pooled output width 1628 // Multiplicative spatial scale factor to translate ROI coords from their 1629 // input scale to the scale used when pooling 1630 optional float spatial_scale = 3 [default = 1]; 1631} 1632 1633message ProposalParameter { 1634 optional uint32 feat_stride = 1 [default = 16]; 1635 optional uint32 base_size = 2 [default = 16]; 1636 optional uint32 min_size = 3 [default = 16]; 1637 repeated float ratio = 4; 1638 repeated float scale = 5; 1639 optional uint32 pre_nms_topn = 6 [default = 6000]; 1640 optional uint32 post_nms_topn = 7 [default = 300]; 1641 optional float nms_thresh = 8 [default = 0.7]; 1642} 1643 1644// origin: https://github.com/daijifeng001/caffe-rfcn 1645message PSROIPoolingParameter { 1646 required float spatial_scale = 1; 1647 required int32 output_dim = 2; // output channel number 1648 required int32 group_size = 3; // equal to pooled_size 1649} 1650