1 // Copyright (C) 2015 Davis E. King (davis@dlib.net) 2 // License: Boost Software License See LICENSE.txt for the full license. 3 #undef DLIB_DNn_CORE_ABSTRACT_H_ 4 #ifdef DLIB_DNn_CORE_ABSTRACT_H_ 5 6 #include "../cuda/tensor_abstract.h" 7 #include <memory> 8 #include <type_traits> 9 #include <tuple> 10 #include <vector> 11 #include "../rand.h" 12 13 14 namespace dlib 15 { 16 17 // ---------------------------------------------------------------------------------------- 18 19 template < 20 typename... T 21 > 22 auto tuple_tail( 23 const std::tuple<T...>& item 24 ); 25 /*! 26 ensures 27 - returns a tuple that contains everything in item except for tuple_head(item). 28 The items will be in the same order as they are in item, just without 29 tuple_head(item). 30 - This function will correctly handle nested tuples. 31 !*/ 32 33 template <typename... T> 34 auto tuple_head ( 35 const std::tuple<T...>& item 36 ); 37 /*! 38 ensures 39 - returns a copy of the first thing in the tuple that isn't a std::tuple. 40 Essentially, this function calls std::get<0>() recursively on item until 41 a non-std::tuple object is found. 42 !*/ 43 44 // ---------------------------------------------------------------------------------------- 45 46 template <typename T> 47 double get_learning_rate_multiplier( 48 const T& obj 49 ); 50 /*! 51 ensures 52 - if (obj has a get_learning_rate_multiplier() member function) then 53 - returns obj.get_learning_rate_multiplier() 54 - else 55 - returns 1 56 !*/ 57 58 template <typename T> 59 void set_learning_rate_multiplier( 60 T& obj, 61 double learning_rate_multiplier 62 ); 63 /*! 64 requires 65 - learning_rate_multiplier >= 0 66 ensures 67 - if (obj has a set_learning_rate_multiplier() member function) then 68 - calls obj.set_learning_rate_multiplier(learning_rate_multiplier) 69 - else 70 - does nothing 71 !*/ 72 73 // ---------------------------------------------------------------------------------------- 74 75 template <typename T> 76 double get_bias_learning_rate_multiplier( 77 const T& obj 78 ); 79 /*! 80 ensures 81 - if (obj has a get_bias_learning_rate_multiplier() member function) then 82 - returns obj.get_bias_learning_rate_multiplier() 83 - else 84 - returns 1 85 !*/ 86 87 template <typename T> 88 void set_bias_learning_rate_multiplier( 89 T& obj, 90 double bias_learning_rate_multiplier 91 ); 92 /*! 93 requires 94 - bias_learning_rate_multiplier >= 0 95 ensures 96 - if (obj has a set_bias_learning_rate_multiplier() member function) then 97 - calls obj.set_bias_learning_rate_multiplier(bias_learning_rate_multiplier) 98 - else 99 - does nothing 100 !*/ 101 102 // ---------------------------------------------------------------------------------------- 103 104 template <typename T> 105 double get_weight_decay_multiplier( 106 const T& obj 107 ); 108 /*! 109 ensures 110 - if (obj has a get_weight_decay_multiplier() member function) then 111 - returns obj.get_weight_decay_multiplier() 112 - else 113 - returns 1 114 !*/ 115 116 template <typename T> 117 void set_weight_decay_multiplier( 118 T& obj, 119 double weight_decay_multiplier 120 ); 121 /*! 122 requires 123 - weight_decay_multiplier >= 0 124 ensures 125 - if (obj has a set_weight_decay_multiplier() member function) then 126 - calls obj.set_weight_decay_multiplier(weight_decay_multiplier) 127 - else 128 - does nothing 129 !*/ 130 131 // ---------------------------------------------------------------------------------------- 132 133 template <typename T> 134 double get_bias_weight_decay_multiplier( 135 const T& obj 136 ); 137 /*! 138 ensures 139 - if (obj has a get_bias_weight_decay_multiplier() member function) then 140 - returns obj.get_bias_weight_decay_multiplier() 141 - else 142 - returns 1 143 !*/ 144 145 template <typename T> 146 void set_bias_weight_decay_multiplier( 147 T& obj, 148 double bias_weight_decay_multiplier 149 ); 150 /*! 151 requires: 152 - bias_weight_decay_multiplier >= 0 153 ensures 154 - if (obj has a set_bias_weight_decay_multiplier() member function) then 155 - calls obj.set_bias_weight_decay_multiplier(bias_weight_decay_multiplier) 156 - else 157 - does nothing 158 !*/ 159 160 // ---------------------------------------------------------------------------------------- 161 162 template <typename T> 163 void disable_bias( 164 T& obj 165 ); 166 /*! 167 ensures 168 - if (obj has a disable_bias() member function) then 169 - calls obj.disable_bias() 170 - else 171 - does nothing 172 !*/ 173 174 // ---------------------------------------------------------------------------------------- 175 176 bool dnn_prefer_fastest_algorithms( 177 ); 178 /*! 179 ensures 180 - If dlib should prefer to use fast algorithms rather than ones that use less 181 RAM then this function returns true and false otherwise. 182 - On program startup this function will default to true. 183 !*/ 184 185 void set_dnn_prefer_fastest_algorithms( 186 ); 187 /*! 188 ensures 189 - #dnn_prefer_fastest_algorithms() == true 190 !*/ 191 192 void set_dnn_prefer_smallest_algorithms( 193 ); 194 /*! 195 ensures 196 - #dnn_prefer_fastest_algorithms() == false 197 !*/ 198 199 // ---------------------------------------------------------------------------------------- 200 201 template < 202 typename T 203 > 204 class sstack 205 { 206 /*! 207 WHAT THIS OBJECT REPRESENTS 208 This is a basic stack of T objects. It contains no data itself but simply 209 points to a memory range of T object and allows you to access that block of 210 T objects as a stack. 211 !*/ 212 213 public: 214 typedef T value_type; 215 216 sstack() = delete; 217 218 sstack ( 219 T* data, 220 size_t s 221 ); 222 /*! 223 ensures 224 - #size() == s 225 - #top() == *data 226 - #pop(i).top() == data[i] 227 !*/ 228 229 const T& top( 230 ) const; 231 /*! 232 requires 233 - size() != 0 234 ensures 235 - returns the top element of the stack. 236 !*/ 237 238 T& top( 239 ); 240 /*! 241 requires 242 - size() != 0 243 ensures 244 - returns the top element of the stack. 245 !*/ 246 247 size_t size( 248 ) const; 249 /*! 250 ensures 251 - returns the number of elements in this stack. 252 !*/ 253 254 sstack pop( 255 size_t num = 1 256 ); 257 /*! 258 requires 259 - num <= size() 260 ensures 261 - returns a reference to the sub-stack S such that: 262 - S.size() == size()-num. 263 - S.top() is num elements down the stack. 264 !*/ 265 }; 266 267 template < 268 typename T 269 > make_sstack(std::vector<T> & item)270 sstack<T> make_sstack( 271 std::vector<T>& item 272 ) { return sstack<T>(item.data(), item.size()); } 273 /*! 274 ensures 275 - returns a sstack that sits on top of the given std::vector. 276 !*/ 277 278 // ---------------------------------------------------------------------------------------- 279 280 template < 281 typename LAYER_DETAILS, 282 typename SUBNET 283 > 284 class add_layer 285 { 286 /*! 287 REQUIREMENTS ON LAYER_DETAILS 288 - Must be a type that implements the EXAMPLE_COMPUTATIONAL_LAYER_ interface 289 defined in layers_abstract.h 290 291 REQUIREMENTS ON SUBNET 292 - One of the following must be true: 293 - SUBNET implements the EXAMPLE_INPUT_LAYER interface defined in 294 input_abstract.h. 295 - SUBNET is an add_layer object. 296 - SUBNET is an add_tag_layer object. 297 - SUBNET is an add_skip_layer object. 298 - SUBNET is a repeat object. 299 300 WHAT THIS OBJECT REPRESENTS 301 This object represents a deep neural network. In particular, it is a tool 302 for adding another layer on top of the neural network of type SUBNET, which 303 is specified as a template argument. The specific layer added is defined 304 by the LAYER_DETAILS details template argument. 305 !*/ 306 307 public: 308 typedef LAYER_DETAILS layer_details_type; 309 typedef SUBNET subnet_type; 310 typedef typename subnet_type::input_type input_type; 311 // num_computational_layers will always give the number of layers in the network 312 // that transform tensors (i.e. layers defined by something that implements the 313 // EXAMPLE_COMPUTATIONAL_LAYER_ interface). This is all the layers except for 314 // loss, tag, and skip layers. 315 const static size_t num_computational_layers = subnet_type::num_computational_layers + 1; 316 // num_layers counts all the layers in the network regardless of their type. 317 const static size_t num_layers = subnet_type::num_layers + 1; 318 319 add_layer( 320 ); 321 /*! 322 ensures 323 - default constructs all the layers in this network. 324 - #sample_expansion_factor() == 0 325 !*/ 326 327 add_layer(const add_layer&) = default; 328 add_layer(add_layer&&) = default; 329 add_layer& operator=(add_layer&&) = default; 330 add_layer& operator=(const add_layer&) = default; 331 /*! 332 ensures 333 - this object is copyable and movable. 334 !*/ 335 336 template <typename T, typename U> 337 add_layer( 338 const add_layer<T,U>& item 339 ); 340 /*! 341 ensures 342 - This constructor allows you to copy neural network objects from one to 343 another as long as their corresponding layers can be constructed from 344 each other. 345 - #layer_details() == layer_details_type(item.layer_details()) 346 - #subnet() == subnet_type(item.subnet()) 347 - #sample_expansion_factor() == item.sample_expansion_factor() 348 !*/ 349 350 template <typename ...T, typename LD, typename ...U> 351 add_layer( 352 const std::tuple<LD,U...>& layer_det, 353 T&& ...args 354 ); 355 /*! 356 ensures 357 - #layer_details() == layer_details_type(tuple_head(layer_det)) 358 - #subnet() == subnet_type(tuple_tail(layer_det),args) 359 - #sample_expansion_factor() == 0 360 !*/ 361 362 template <typename ...T> 363 add_layer( 364 const layer_details_type& layer_det, 365 T&& ...args 366 ); 367 /*! 368 ensures 369 - #layer_details() == layer_details_type(layer_det) 370 - #subnet() == subnet_type(args) 371 - #sample_expansion_factor() == 0 372 !*/ 373 374 template <typename ...T> 375 add_layer( 376 T&& ...args 377 ); 378 /*! 379 ensures 380 - This version of the constructor is only called if layer_details_type 381 can't be constructed from the first thing in args. In this case, the 382 args are simply passed on to the sub layers in their entirety. 383 - #layer_details() == layer_details_type() 384 - #subnet() == subnet_type(args) 385 - #sample_expansion_factor() == 0 386 !*/ 387 388 template <typename ...T> 389 add_layer( 390 layer_details_type&& layer_det, 391 T&& ...args 392 ); 393 /*! 394 ensures 395 - #layer_details() == layer_det 396 - #subnet() == subnet_type(args) 397 - #sample_expansion_factor() == 0 398 !*/ 399 400 template <typename forward_iterator> 401 void to_tensor ( 402 forward_iterator ibegin, 403 forward_iterator iend, 404 resizable_tensor& data 405 ) const; 406 /*! 407 requires 408 - [ibegin, iend) is an iterator range over input_type objects. 409 - std::distance(ibegin,iend) > 0 410 ensures 411 - Converts the iterator range into a tensor and stores it into #data. 412 - #data.num_samples()%distance(ibegin,iend) == 0. 413 - #sample_expansion_factor() == #data.num_samples()/distance(ibegin,iend). 414 - #sample_expansion_factor() > 0 415 - The data in the ith sample of #data corresponds to the input_type object 416 *(ibegin+i/#sample_expansion_factor()). 417 - Invokes data.async_copy_to_device() so that the data begins transferring 418 to the GPU device, if present. 419 - This function is implemented by calling the to_tensor() routine defined 420 at the input layer of this network. 421 !*/ 422 423 unsigned int sample_expansion_factor ( 424 ) const; 425 /*! 426 ensures 427 - When to_tensor() is invoked on this network's input layer it converts N 428 input objects into M samples, all stored inside a resizable_tensor. It 429 is always the case that M is some integer multiple of N. 430 sample_expansion_factor() returns the value of this multiplier. To be 431 very specific, it is always true that M==I*N where I is some integer. 432 This integer I is what is returned by sample_expansion_factor(). 433 !*/ 434 435 const subnet_type& subnet( 436 ) const; 437 /*! 438 ensures 439 - returns the immediate subnetwork of *this network. 440 !*/ 441 442 subnet_type& subnet( 443 ); 444 /*! 445 ensures 446 - returns the immediate subnetwork of *this network. 447 !*/ 448 449 const layer_details_type& layer_details( 450 ) const; 451 /*! 452 ensures 453 - returns the layer_details_type instance that defines the behavior of the 454 layer at the top of this network. I.e. returns the layer details that 455 defines the behavior of the layer nearest to the network output rather 456 than the input layer. 457 !*/ 458 459 layer_details_type& layer_details( 460 ); 461 /*! 462 ensures 463 - returns the layer_details_type instance that defines the behavior of the 464 layer at the top of this network. I.e. returns the layer details that 465 defines the behavior of the layer nearest to the network output rather 466 than the input layer. 467 !*/ 468 469 template <typename forward_iterator> 470 const tensor& operator() ( 471 forward_iterator ibegin, 472 forward_iterator iend 473 ); 474 /*! 475 requires 476 - [ibegin, iend) is an iterator range over input_type objects. 477 - std::distance(ibegin,iend) > 0 478 ensures 479 - runs [ibegin,iend) through the network and returns the results. 480 In particular, this function performs: 481 to_tensor(ibegin,iend,temp_tensor); 482 return forward(temp_tensor); 483 - The return value from this function is also available in #get_output(). 484 i.e. this function returns #get_output(). 485 - have_same_dimensions(#get_gradient_input(), #get_output()) == true. 486 - All elements of #get_gradient_input() are set to 0. 487 i.e. calling this function clears out #get_gradient_input() and ensures 488 it has the same dimensions as the most recent output. 489 !*/ 490 491 const tensor& operator() ( 492 const input_type& x 493 ); 494 /*! 495 ensures 496 - runs a single x through the network and returns the output. 497 I.e. returns (*this)(&x, &x+1); 498 !*/ 499 500 const tensor& forward( 501 const tensor& x 502 ); 503 /*! 504 requires 505 - sample_expansion_factor() != 0 506 (i.e. to_tensor() must have been called to set sample_expansion_factor() 507 to something non-zero.) 508 - x.num_samples()%sample_expansion_factor() == 0 509 - x.num_samples() > 0 510 ensures 511 - Runs x through the network and returns the results. In particular, this 512 function performs the equivalent of: 513 subnet().forward(x); 514 if (this is the first time forward() has been called) then 515 layer_details().setup(subnet()); 516 layer_details().forward(subnet(), get_output()); 517 - The return value from this function is also available in #get_output(). 518 i.e. this function returns #get_output(). 519 - have_same_dimensions(#get_gradient_input(), #get_output()) == true 520 - All elements of #get_gradient_input() are set to 0. 521 i.e. calling this function clears out #get_gradient_input() and ensures 522 it has the same dimensions as the most recent output. 523 !*/ 524 525 const tensor& get_output( 526 ) const; 527 /*! 528 ensures 529 - returns the output for the last tensor that was run through the network. 530 If nothing has been run through the network yet then returns an empty 531 tensor. 532 !*/ 533 534 tensor& get_gradient_input( 535 ); 536 /*! 537 ensures 538 - returns the error gradient for this network. That is, this is the error 539 gradient that this network will use to compute parameter gradients when 540 back_propagate_error() is called. Therefore, when performing back 541 propagation, layers that sit on top of this network layer write their 542 back-propagated error gradients into get_gradient_input(). Or to put it 543 another way, during back-propagation, layers take the contents of their 544 get_gradient_input() and back-propagate it through themselves and store 545 the result into their subnetwork's get_gradient_input(). 546 547 This means you should consider get_gradient_input() as an input to the 548 back_propagate_error() method. 549 !*/ 550 551 const tensor& get_final_data_gradient( 552 ) const; 553 /*! 554 ensures 555 - if back_propagate_error() has been called to back-propagate a gradient 556 through this network then you can call get_final_data_gradient() to 557 obtain the last data gradient computed. That is, this function returns 558 the gradient of the network with respect to its inputs. 559 - Note that there is only one "final data gradient" for an entire network, 560 not one per layer, since there is only one input to the entire network. 561 !*/ 562 563 const tensor& get_parameter_gradient( 564 ) const; 565 /*! 566 ensures 567 - if back_propagate_error() has been called then you can call 568 get_parameter_gradient() to find the gradient of this layer's parameters. 569 When we update the parameters by calling update_parameters(), it will use 570 the gradient in get_parameter_gradient() to perform the update. 571 Therefore, you should consider get_parameter_gradient() as an input to 572 update_parameters(). 573 !*/ 574 575 tensor& get_parameter_gradient ( 576 ); 577 /*! 578 ensures 579 - returns a non-const reference to the tensor returned by the above 580 get_parameter_gradient() method. You could use this method to modify the 581 parameter gradient in some way before invoking update_parameters(). 582 !*/ 583 584 void back_propagate_error( 585 const tensor& x 586 ); 587 /*! 588 requires 589 - forward(x) was called to forward propagate x though the network. 590 Moreover, this was the most recent call to forward() and x has not been 591 subsequently modified in any way. 592 - get_gradient_input() has been set equal to the gradient of this network's 593 output with respect to some loss function. 594 ensures 595 - Back propagates the error gradient, get_gradient_input(), through this 596 network and computes parameter and data gradients, via backpropagation. 597 Specifically, this function populates get_final_data_gradient() and also, 598 for each layer, the tensor returned by get_parameter_gradient(). 599 - All elements of #get_gradient_input() are set to 0. 600 - have_same_dimensions(#get_final_data_gradient(), x) == true. 601 - have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true. 602 - #get_final_data_gradient() contains the gradient of the network with 603 respect to x. 604 !*/ 605 606 void back_propagate_error( 607 const tensor& x, 608 const tensor& gradient_input 609 ); 610 /*! 611 requires 612 - forward(x) was called to forward propagate x though the network. 613 Moreover, this was the most recent call to forward() and x has not been 614 subsequently modified in any way. 615 - have_same_dimensions(gradient_input, get_output()) == true 616 ensures 617 - This function is identical to the version of back_propagate_error() 618 defined immediately above except that it back-propagates gradient_input 619 through the network instead of get_gradient_input(). Therefore, this 620 version of back_propagate_error() is equivalent to performing: 621 get_gradient_input() = gradient_input; 622 back_propagate_error(x); 623 Except that calling back_propagate_error(x,gradient_input) avoids the 624 copy and is therefore slightly more efficient. 625 - All elements of #get_gradient_input() are set to 0. 626 - have_same_dimensions(#get_final_data_gradient(), x) == true. 627 - have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true. 628 - #get_final_data_gradient() contains the gradient of the network with 629 respect to x. 630 !*/ 631 632 template <typename solver_type> 633 void update_parameters( 634 sstack<solver_type> solvers, 635 double learning_rate 636 ); 637 /*! 638 requires 639 - solver_type is an implementation of the EXAMPLE_SOLVER interface defined 640 in solvers_abstract.h 641 - back_propagate_error() has been called. 642 - The given solvers have only ever been used with this network. That is, 643 if you want to call update_parameters() on some other neural network 644 object then you must NOT reuse the same solvers object. 645 - solvers.size() >= num_computational_layers 646 - 0 < learning_rate <= 1 647 ensures 648 - Updates all the parameters in the network. In particular, we pass each 649 layer's parameter gradient (i.e. the tensor returned by the layer's 650 get_parameter_gradient() member) through that layer's corresponding 651 solver object. This produces a parameter delta vector which we add to 652 the layer's parameters. 653 - The solvers use the given learning rate. 654 !*/ 655 656 template <typename solver_type> update_parameters(std::vector<solver_type> & solvers,double learning_rate)657 void update_parameters(std::vector<solver_type>& solvers, double learning_rate) 658 { update_parameters(make_sstack(solvers), learning_rate); } 659 /*! 660 Convenience method for calling update_parameters() 661 !*/ 662 663 void clean( 664 ); 665 /*! 666 ensures 667 - Causes the network to forget about everything but its parameters. 668 That is, for each layer we will have: 669 - get_output().num_samples() == 0 670 - get_gradient_input().num_samples() == 0 671 However, running new input data though this network will still produce 672 the same output it would have produced regardless of any calls to 673 clean(). The purpose of clean() is to compact the network object prior 674 to saving it to disk so that it takes up less space and the IO is 675 quicker. 676 - This also calls the .clean() method on any layer details objects that 677 define a .clean() method. 678 !*/ 679 680 }; 681 682 template <typename T, typename U> 683 std::ostream& operator<<(std::ostream& out, const add_layer<T,U>& item); 684 /*! 685 prints the network architecture to the given output stream. 686 !*/ 687 688 template <typename T, typename U> 689 void serialize(const add_layer<T,U>& item, std::ostream& out); 690 template <typename T, typename U> 691 void deserialize(add_layer<T,U>& item, std::istream& in); 692 /*! 693 provides serialization support 694 !*/ 695 696 // ---------------------------------------------------------------------------------------- 697 // ---------------------------------------------------------------------------------------- 698 // ---------------------------------------------------------------------------------------- 699 700 class no_label_type; 701 702 template < 703 typename LOSS_DETAILS, 704 typename SUBNET 705 > 706 class add_loss_layer 707 { 708 /*! 709 REQUIREMENTS ON LOSS_DETAILS 710 - Must be a type that implements the EXAMPLE_LOSS_LAYER_ interface defined 711 in loss_abstract.h 712 713 REQUIREMENTS ON SUBNET 714 - One of the following must be true: 715 - SUBNET is an add_layer object. 716 - SUBNET is an add_tag_layer object. 717 - SUBNET is an add_skip_layer object. 718 - SUBNET is a repeat object. 719 720 WHAT THIS OBJECT REPRESENTS 721 This object represents a deep neural network. In particular, it is a tool 722 for adding a loss layer on top of the neural network of type SUBNET, which 723 is specified as a template argument. The specific layer added is defined 724 by the LOSS_DETAILS details template argument. Importantly, a loss layer 725 is the last layer in a deep neural network. So once it is added you can't 726 add any other layers of any type. 727 !*/ 728 729 public: 730 typedef LOSS_DETAILS loss_details_type; 731 typedef SUBNET subnet_type; 732 typedef typename subnet_type::input_type input_type; 733 const static size_t num_computational_layers = subnet_type::num_computational_layers; 734 const static size_t num_layers = subnet_type::num_layers + 1; 735 // If LOSS_DETAILS is an unsupervised loss then training_label_type==no_label_type. 736 // Otherwise it is defined as follows: 737 typedef typename LOSS_DETAILS::training_label_type training_label_type; 738 // Similarly, if LOSS_DETAILS doesn't provide any output conversion then 739 // output_label_type==no_label_type. 740 typedef typename LOSS_DETAILS::output_label_type output_label_type; 741 742 743 744 add_loss_layer() = default; 745 /*! 746 ensures 747 - default constructs all the layers in this network. 748 !*/ 749 750 add_loss_layer(const add_loss_layer&) = default; 751 add_loss_layer(add_loss_layer&&) = default; 752 add_loss_layer& operator=(add_loss_layer&&) = default; 753 add_loss_layer& operator=(const add_loss_layer&) = default; 754 /*! 755 ensures 756 - this object is copyable and movable. 757 !*/ 758 759 template <typename T, typename U> 760 add_loss_layer( 761 const add_loss_layer<T,U>& item 762 ); 763 /*! 764 ensures 765 - This constructor allows you to copy neural network objects from one to 766 another as long as their corresponding layers can be constructed from 767 each other. 768 - #loss_details() == loss_details_type(item.loss_details()) 769 - #subnet() == subnet_type(item.subnet()) 770 !*/ 771 772 template <typename ...T> 773 add_loss_layer( 774 const LOSS_DETAILS& layer_det, 775 T&& ...args 776 ); 777 /*! 778 ensures 779 - #loss_details() == loss_details_type(layer_det) 780 - #subnet() == subnet_type(args) 781 !*/ 782 783 template <typename ...T> 784 add_loss_layer( 785 LOSS_DETAILS&& layer_det, 786 T&& ...args 787 ); 788 /*! 789 ensures 790 - #loss_details() == loss_details_type(layer_det) 791 - #subnet() == subnet_type(args) 792 !*/ 793 794 template <typename ...T> 795 add_loss_layer( 796 T&& ...args 797 ); 798 /*! 799 ensures 800 - This version of the constructor is only called if loss_details_type can't 801 be constructed from the first thing in args. In this case, the args are 802 simply passed on to the sub layers in their entirety. 803 - #loss_details() == loss_details_type() 804 - #subnet() == subnet_type(args) 805 !*/ 806 807 const subnet_type& subnet( 808 ) const; 809 /*! 810 ensures 811 - returns the immediate subnetwork of *this network. 812 !*/ 813 814 subnet_type& subnet( 815 ); 816 /*! 817 ensures 818 - returns the immediate subnetwork of *this network. 819 !*/ 820 821 const loss_details_type& loss_details( 822 ) const; 823 /*! 824 ensures 825 - returns the loss_details_type instance that defines the behavior of the 826 loss layer used by this network. 827 !*/ 828 829 loss_details_type& loss_details( 830 ); 831 /*! 832 ensures 833 - returns the loss_details_type instance that defines the behavior of the 834 loss layer used by this network. 835 !*/ 836 837 template <typename forward_iterator> 838 void to_tensor ( 839 forward_iterator ibegin, 840 forward_iterator iend, 841 resizable_tensor& data 842 ) const; 843 /*! 844 requires 845 - [ibegin, iend) is an iterator range over input_type objects. 846 - std::distance(ibegin,iend) > 0 847 ensures 848 - Converts the iterator range into a tensor and stores it into #data. 849 - #data.num_samples()%distance(ibegin,iend) == 0. 850 - #sample_expansion_factor() == #data.num_samples()/distance(ibegin,iend). 851 - #sample_expansion_factor() > 0 852 - The data in the ith sample of #data corresponds to the input_type object 853 *(ibegin+i/sample_expansion_factor()). 854 - Invokes data.async_copy_to_device() so that the data begins transferring 855 to the GPU device, if present. 856 - This function is implemented by calling the to_tensor() routine defined 857 at the input layer of this network. 858 !*/ 859 860 unsigned int sample_expansion_factor ( 861 ) const; 862 /*! 863 ensures 864 - When to_tensor() is invoked on this network's input layer it converts N 865 input objects into M samples, all stored inside a resizable_tensor. It 866 is always the case that M is some integer multiple of N. 867 sample_expansion_factor() returns the value of this multiplier. To be 868 very specific, it is always true that M==I*N where I is some integer. 869 This integer I is what is returned by sample_expansion_factor(). 870 !*/ 871 872 // ------------- 873 874 const tensor& forward(const tensor& x 875 ); 876 /*! 877 requires 878 - sample_expansion_factor() != 0 879 (i.e. to_tensor() must have been called to set sample_expansion_factor() 880 to something non-zero.) 881 - x.num_samples()%sample_expansion_factor() == 0 882 - x.num_samples() > 0 883 ensures 884 - Runs x through the network and returns the results as a tensor. In particular, 885 this function just performs: 886 return subnet().forward(x); 887 So if you want to get the outputs as an output_label_type then call one of the 888 methods below instead, like operator(). 889 - The return value from this function is also available in #subnet().get_output(). 890 i.e. this function returns #subnet().get_output(). 891 - have_same_dimensions(#subnet().get_gradient_input(), #subnet().get_output()) == true 892 - All elements of #subnet().get_gradient_input() are set to 0. 893 i.e. calling this function clears out #subnet().get_gradient_input() and ensures 894 it has the same dimensions as the most recent output. 895 !*/ 896 897 template <typename output_iterator> 898 void operator() ( 899 const tensor& x, 900 output_iterator obegin 901 ); 902 /*! 903 requires 904 - sample_expansion_factor() != 0 905 (i.e. to_tensor() must have been called to set sample_expansion_factor() 906 to something non-zero.) 907 - x.num_samples()%sample_expansion_factor() == 0 908 - x.num_samples() > 0 909 - obegin == iterator pointing to the start of a range of 910 x.num_samples()/sample_expansion_factor() output_label_type elements. 911 ensures 912 - runs x through the network and writes the output to the range at obegin. 913 - loss_details().to_label() is used to write the network output into 914 obegin. 915 !*/ 916 917 template <typename forward_iterator, typename label_iterator> 918 void operator() ( 919 forward_iterator ibegin, 920 forward_iterator iend, 921 label_iterator obegin 922 ); 923 /*! 924 requires 925 - [ibegin, iend) is an iterator range over input_type objects. 926 - std::distance(ibegin,iend) > 0 927 - obegin == iterator pointing to the start of a range of 928 std::distance(ibegin,iend) output_label_type elements. 929 ensures 930 - runs [ibegin,iend) through the network and writes the output to the range 931 at obegin. 932 - loss_details().to_label() is used to write the network output into 933 obegin. 934 !*/ 935 936 // ------------- 937 938 const output_label_type& operator() ( 939 const input_type& x 940 ); 941 /*! 942 ensures 943 - runs a single object, x, through the network and returns the output. 944 - loss_details().to_label() is used to convert the network output into a 945 output_label_type. 946 !*/ 947 948 template <typename iterable_type> 949 std::vector<output_label_type> operator() ( 950 const iterable_type& data, 951 size_t batch_size = 128 952 ); 953 /*! 954 requires 955 - batch_size > 0 956 - data must have a .begin() and .end() that supply iterators over a 957 sequence of input_type elements. E.g. data could have a type of 958 std::vector<input_type> 959 ensures 960 - runs all the objects in data through the network and returns their 961 predicted labels. This means this function returns a vector V such that: 962 - V.size() == data.size() 963 - for all valid i: V[i] == the predicted label of data[i]. 964 - Elements of data are run through the network in batches of batch_size 965 items. Using a batch_size > 1 can be faster because it better exploits 966 the available hardware parallelism. 967 - loss_details().to_label() is used to convert the network output into a 968 output_label_type. 969 !*/ 970 971 template <typename ...T> 972 const output_label_type& process ( 973 const input_type& x, 974 T&& ...args 975 ); 976 /*! 977 ensures 978 - This function is just like (*this)(x), i.e. it runs a single object, x, 979 through the network and returns the output. But we additionally pass the 980 given args to loss_details().to_label() as the 4th argument (or more, 981 depending on how many things are in args) when converting the network 982 output to an output_label_type. This is useful, for instance, with loss 983 layers like loss_mmod_ which has an optional adjust_threshold argument to 984 to_label() that adjusts the detection threshold. Therefore, for such 985 networks you could call them like: net.process(some_image, -0.5), and -0.5 986 would be passed so the adjust_threshold argument of to_tensor(). 987 !*/ 988 989 template <typename iterable_type, typename ...T> 990 std::vector<output_label_type> process_batch ( 991 const iterable_type& data, 992 size_t batch_size, 993 T&& ...args 994 ); 995 /*! 996 requires 997 - batch_size > 0 998 - data must have a .begin() and .end() that supply iterators over a 999 sequence of input_type elements. E.g. data could have a type of 1000 std::vector<input_type> 1001 ensures 1002 - This function is just like (*this)(data,batch_size), i.e. it runs a 1003 bunch of objects through the network and returns the outputs. But we 1004 additionally pass the given args to loss_details().to_label() as the 4th 1005 argument (or more, depending on how many things are in args) when 1006 converting the network output to output_label_types. This is useful, 1007 for instance, with loss layers like loss_mmod_ which has an optional 1008 adjust_threshold argument to to_label() that adjusts the detection 1009 threshold. Therefore, for such networks you could call them like: 1010 net.process_batch(std::vector<image_type>({some_image, another_image}), 128, -0.5), 1011 and -0.5 would be passed so the adjust_threshold argument of to_tensor(). 1012 !*/ 1013 1014 // ------------- 1015 1016 template <typename label_iterator> 1017 double compute_loss ( 1018 const tensor& x, 1019 label_iterator lbegin 1020 ); 1021 /*! 1022 requires 1023 - sample_expansion_factor() != 0 1024 (i.e. to_tensor() must have been called to set sample_expansion_factor() 1025 to something non-zero.) 1026 - x.num_samples()%sample_expansion_factor() == 0 1027 - x.num_samples() > 0 1028 - lbegin == iterator pointing to the start of a range of 1029 x.num_samples()/sample_expansion_factor() training_label_type elements. 1030 ensures 1031 - runs x through the network, compares the output to the expected output 1032 pointed to by lbegin, and returns the resulting loss. 1033 - for all valid k: 1034 - the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor()). 1035 - This function does not update the network parameters. 1036 - For sub-layers that are immediate inputs into the loss layer, we also populate the 1037 sub-layer's get_gradient_input() tensor with the gradient of the loss with respect 1038 to the sub-layer's output. 1039 !*/ 1040 1041 template <typename forward_iterator, typename label_iterator> 1042 double compute_loss ( 1043 forward_iterator ibegin, 1044 forward_iterator iend, 1045 label_iterator lbegin 1046 ); 1047 /*! 1048 requires 1049 - [ibegin, iend) is an iterator range over input_type objects. 1050 - std::distance(ibegin,iend) > 0 1051 - lbegin == iterator pointing to the start of a range of 1052 std::distance(ibegin,iend) training_label_type elements. 1053 ensures 1054 - runs [ibegin,iend) through the network, compares the output to the 1055 expected output pointed to by lbegin, and returns the resulting loss. 1056 - for all valid k: 1057 - the expected label of *(ibegin+k) is *(lbegin+k). 1058 - This function does not update the network parameters. 1059 - For sub-layers that are immediate inputs into the loss layer, we also populate the 1060 sub-layer's get_gradient_input() tensor with the gradient of the loss with respect 1061 to the sub-layer's output. 1062 !*/ 1063 1064 // ------------- 1065 1066 double compute_loss ( 1067 const tensor& x 1068 ); 1069 /*! 1070 requires 1071 - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. 1072 - sample_expansion_factor() != 0 1073 (i.e. to_tensor() must have been called to set sample_expansion_factor() 1074 to something non-zero.) 1075 - x.num_samples()%sample_expansion_factor() == 0 1076 - x.num_samples() > 0 1077 ensures 1078 - runs x through the network and returns the resulting loss. 1079 - This function does not update the network parameters. 1080 - For sub-layers that are immediate inputs into the loss layer, we also populate the 1081 sub-layer's get_gradient_input() tensor with the gradient of the loss with respect 1082 to the sub-layer's output. 1083 !*/ 1084 1085 template <typename forward_iterator> 1086 double compute_loss ( 1087 forward_iterator ibegin, 1088 forward_iterator iend, 1089 ); 1090 /*! 1091 requires 1092 - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. 1093 - [ibegin, iend) is an iterator range over input_type objects. 1094 - std::distance(ibegin,iend) > 0 1095 ensures 1096 - runs [ibegin,iend) through the network and returns the resulting loss. 1097 - This function does not update the network parameters. 1098 - For sub-layers that are immediate inputs into the loss layer, we also populate the 1099 sub-layer's get_gradient_input() tensor with the gradient of the loss with respect 1100 to the sub-layer's output. 1101 !*/ 1102 1103 // ------------- 1104 1105 template <typename label_iterator> 1106 double compute_parameter_gradients ( 1107 const tensor& x, 1108 label_iterator lbegin 1109 ); 1110 /*! 1111 requires 1112 - sample_expansion_factor() != 0 1113 (i.e. to_tensor() must have been called to set sample_expansion_factor() 1114 to something non-zero.) 1115 - x.num_samples()%sample_expansion_factor() == 0 1116 - x.num_samples() > 0 1117 - lbegin == iterator pointing to the start of a range of 1118 x.num_samples()/sample_expansion_factor() training_label_type elements. 1119 ensures 1120 - runs x through the network, compares the output to the expected output 1121 pointed to by lbegin, and computes parameter and data gradients with 1122 respect to the loss, via backpropagation. Specifically, this function 1123 updates get_final_data_gradient() and also, for each layer, the tensor 1124 returned by get_parameter_gradient(). 1125 - for all valid k: 1126 - the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor()). 1127 - returns compute_loss(x,lbegin) 1128 !*/ 1129 1130 template <typename forward_iterator, typename label_iterator> 1131 double compute_parameter_gradients ( 1132 forward_iterator ibegin, 1133 forward_iterator iend, 1134 label_iterator lbegin 1135 ); 1136 /*! 1137 requires 1138 - [ibegin, iend) is an iterator range over input_type objects. 1139 - std::distance(ibegin,iend) > 0 1140 - lbegin == iterator pointing to the start of a range of 1141 std::distance(ibegin,iend) training_label_type elements. 1142 ensures 1143 - runs [ibegin,iend) through the network, compares the output to the 1144 expected output pointed to by lbegin, and computes parameter and data 1145 gradients with respect to the loss, via backpropagation. Specifically, 1146 this function updates get_final_data_gradient() and also, for each layer, 1147 the tensor returned by get_parameter_gradient(). 1148 - for all valid k: 1149 - the expected label of *(ibegin+k) is *(lbegin+k). 1150 - returns compute_loss(ibegin,iend,lbegin) 1151 !*/ 1152 1153 double compute_parameter_gradients ( 1154 const tensor& x 1155 ); 1156 /*! 1157 requires 1158 - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. 1159 - sample_expansion_factor() != 0 1160 (i.e. to_tensor() must have been called to set sample_expansion_factor() 1161 to something non-zero.) 1162 - x.num_samples()%sample_expansion_factor() == 0 1163 - x.num_samples() > 0 1164 ensures 1165 - runs x through the network and computes parameter and data gradients with 1166 respect to the loss, via backpropagation. Specifically, this function 1167 updates get_final_data_gradient() and also, for each layer, the tensor 1168 returned by get_parameter_gradient(). 1169 - returns compute_loss(x) 1170 !*/ 1171 1172 template <typename forward_iterator> 1173 double compute_parameter_gradients ( 1174 forward_iterator ibegin, 1175 forward_iterator iend 1176 ); 1177 /*! 1178 requires 1179 - LOSS_DETAILS is an unsupervised loss. i.e. training_label_type==no_label_type. 1180 - [ibegin, iend) is an iterator range over input_type objects. 1181 - std::distance(ibegin,iend) > 0 1182 ensures 1183 - runs [ibegin,iend) through the network and computes parameter and data 1184 gradients with respect to the loss, via backpropagation. Specifically, 1185 this function updates get_final_data_gradient() and also, for each layer, 1186 the tensor returned by get_parameter_gradient(). 1187 - returns compute_loss(ibegin,iend) 1188 !*/ 1189 1190 template <typename solver_type> 1191 void update_parameters ( 1192 sstack<solver_type> solvers, 1193 double learning_rate 1194 ); 1195 /*! 1196 requires 1197 - solver_type is an implementation of the EXAMPLE_SOLVER interface defined 1198 in solvers_abstract.h 1199 - compute_parameter_gradients() has been called. 1200 - The given solvers have only ever been used with this network. That 1201 is, if you want to call update_parameters() on some other neural network 1202 object then you must NOT reuse the same solvers object. 1203 - solvers.size() >= num_computational_layers 1204 - 0 < learning_rate <= 1 1205 ensures 1206 - Updates all the parameters in the network. In particular, we pass each 1207 layer's parameter gradient (i.e. the tensor returned by the layer's 1208 get_parameter_gradient() member) through that layer's corresponding 1209 solver object. This produces a parameter delta vector which we add to 1210 the layer's parameters. 1211 - The solvers use the given learning rate. 1212 !*/ 1213 1214 template <typename solver_type> update_parameters(std::vector<solver_type> & solvers,double learning_rate)1215 void update_parameters(std::vector<solver_type>& solvers, double learning_rate 1216 ) { update_parameters(make_sstack(solvers), learning_rate); } 1217 /*! 1218 Convenience method for calling update_parameters() 1219 !*/ 1220 1221 void back_propagate_error( 1222 const tensor& x 1223 ); 1224 /*! 1225 requires 1226 - forward(x) was called to forward propagate x though the network. 1227 Moreover, this was the most recent call to forward() and x has not been 1228 subsequently modified in any way. 1229 - subnet().get_gradient_input() has been set equal to the gradient of this network's 1230 output with respect to the loss function (generally this will be done by calling 1231 compute_loss()). 1232 ensures 1233 - Back propagates the error gradient, subnet().get_gradient_input(), through this 1234 network and computes parameter and data gradients, via backpropagation. 1235 Specifically, this function populates get_final_data_gradient() and also, 1236 for each layer, the tensor returned by get_parameter_gradient(). 1237 - All elements of #subnet().get_gradient_input() are set to 0. 1238 - have_same_dimensions(#get_final_data_gradient(), x) == true. 1239 - #get_final_data_gradient() contains the gradient of the network with 1240 respect to x. 1241 !*/ 1242 1243 void back_propagate_error( 1244 const tensor& x, 1245 const tensor& gradient_input 1246 ); 1247 /*! 1248 requires 1249 - forward(x) was called to forward propagate x though the network. 1250 Moreover, this was the most recent call to forward() and x has not been 1251 subsequently modified in any way. 1252 - have_same_dimensions(gradient_input, subnet().get_output()) == true 1253 ensures 1254 - This function is identical to the version of back_propagate_error() 1255 defined immediately above except that it back-propagates gradient_input 1256 through the network instead of subnet().get_gradient_input(). Therefore, this 1257 version of back_propagate_error() is equivalent to performing: 1258 subnet().get_gradient_input() = gradient_input; 1259 back_propagate_error(x); 1260 Except that calling back_propagate_error(x,gradient_input) avoids the 1261 copy and is therefore slightly more efficient. 1262 - All elements of #subnet.get_gradient_input() are set to 0. 1263 - have_same_dimensions(#get_final_data_gradient(), x) == true. 1264 - #get_final_data_gradient() contains the gradient of the network with 1265 respect to x. 1266 !*/ 1267 1268 const tensor& get_final_data_gradient( 1269 ) const; 1270 /*! 1271 ensures 1272 - if back_propagate_error() has been called to back-propagate a gradient 1273 through this network then you can call get_final_data_gradient() to 1274 obtain the last data gradient computed. That is, this function returns 1275 the gradient of the network with respect to its inputs. 1276 - Note that there is only one "final data gradient" for an entire network, 1277 not one per layer, since there is only one input to the entire network. 1278 !*/ 1279 1280 1281 // ------------- 1282 1283 void clean ( 1284 ); 1285 /*! 1286 ensures 1287 - Causes the network to forget about everything but its parameters. 1288 - invokes subnet().clean() 1289 !*/ 1290 }; 1291 1292 template <typename T, typename U> 1293 std::ostream& operator<<(std::ostream& out, const add_loss_layer<T,U>& item); 1294 /*! 1295 prints the network architecture to the given output stream. 1296 !*/ 1297 1298 template <typename T, typename U> 1299 void serialize(const add_loss_layer<T,U>& item, std::ostream& out); 1300 template <typename T, typename U> 1301 void deserialize(add_loss_layer<T,U>& item, std::istream& in); 1302 /*! 1303 provides serialization support 1304 !*/ 1305 1306 // ---------------------------------------------------------------------------------------- 1307 // ---------------------------------------------------------------------------------------- 1308 // ---------------------------------------------------------------------------------------- 1309 1310 template <typename ...T> 1311 decorator_repeat_group<T...> repeat_group ( 1312 T&& ...args 1313 ); 1314 /*! 1315 ensures 1316 - Decorates a group of variables. This is essentially like std::make_tuple() 1317 except it's only purpose is to group variables together so they can be passed 1318 to the repeat object's constructor. 1319 !*/ 1320 1321 template < 1322 size_t num, 1323 template<typename> class REPEATED_LAYER, 1324 typename SUBNET 1325 > 1326 class repeat 1327 { 1328 /*! 1329 REQUIREMENTS ON num 1330 - num > 0 1331 1332 REQUIREMENTS ON REPEATED_LAYER 1333 - REPEATED_LAYER must be a template that stacks more layers onto a deep neural 1334 network. For example, if net_type were a network without a loss layer, 1335 then it should be legal to create a deeper network with a type of 1336 REPEATED_LAYER<net_type>. 1337 1338 REQUIREMENTS ON SUBNET 1339 - One of the following must be true: 1340 - SUBNET is an add_layer object. 1341 - SUBNET is an add_tag_layer object. 1342 - SUBNET is an add_skip_layer object. 1343 - SUBNET is a repeat object. 1344 1345 WHAT THIS OBJECT REPRESENTS 1346 This object adds more layers to a deep neural network. In particular, it 1347 adds REPEATED_LAYER on top of SUBNET num times. So for example, if num were 2 then 1348 repeat<2,REPEATED_LAYER,SUBNET> would create a network equivalent to REPEATED_LAYER<REPEATED_LAYER<SUBNET>>. 1349 1350 Also, this object provides an interface identical to the one defined by the 1351 add_layer object except that we add the num_repetitions() and 1352 get_repeated_layer() methods. These additions are shown below along with 1353 some additional explanatory comments. 1354 !*/ 1355 1356 public: 1357 1358 typedef SUBNET subnet_type; 1359 typedef typename SUBNET::input_type input_type; 1360 const static size_t num_computational_layers = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers)*num + SUBNET::num_computational_layers; 1361 const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers; 1362 typedef REPEATED_LAYER<an_unspecified_input_type> repeated_layer_type; 1363 1364 template <typename T, typename ...U> 1365 repeat( 1366 T arg1, 1367 U ...args2 1368 ); 1369 /*! 1370 ensures 1371 - arg1 is used to initialize the num_repetitions() copies of REPEATED_LAYER inside 1372 this object. That is, all the REPEATED_LAYER elements are initialized identically 1373 by being given copies of arg1. 1374 - The rest of the arguments to the constructor, i.e. args2, are passed to 1375 SUBNET's constructor. 1376 !*/ 1377 1378 template <typename ...T, typename ...U> 1379 repeat( 1380 decorator_repeat_group<T...>&& arg1, 1381 U ...args2 1382 ); 1383 /*! 1384 ensures 1385 - arg1 is used to initialize the num_repetitions() copies of REPEATED_LAYER inside 1386 this object. That is, all the REPEATED_LAYER elements are initialized identically 1387 by being given copies of an undecorated arg1. 1388 - The rest of the arguments to the constructor, i.e. args2, are passed to 1389 SUBNET's constructor. 1390 !*/ 1391 1392 size_t num_repetitions ( 1393 ) const; 1394 /*! 1395 ensures 1396 - returns num (i.e. the number of times REPEATED_LAYER was stacked on top of SUBNET) 1397 !*/ 1398 1399 const repeated_layer_type& get_repeated_layer ( 1400 size_t i 1401 ) const; 1402 /*! 1403 requires 1404 - i < num_repetitions() 1405 ensures 1406 - returns a reference to the i-th instance of REPEATED_LAYER. For example, 1407 get_repeated_layer(0) returns the instance of REPEATED_LAYER that is on the top of 1408 the network while get_repeated_layer(num_repetitions()-1) returns the 1409 instance of REPEATED_LAYER that is stacked immediately on top of SUBNET. 1410 !*/ 1411 1412 repeated_layer_type& get_repeated_layer ( 1413 size_t i 1414 ); 1415 /*! 1416 requires 1417 - i < num_repetitions() 1418 ensures 1419 - returns a reference to the i-th instance of REPEATED_LAYER. For example, 1420 get_repeated_layer(0) returns the instance of REPEATED_LAYER that is on the top of 1421 the network while get_repeated_layer(num_repetitions()-1) returns the 1422 instance of REPEATED_LAYER that is stacked immediately on top of SUBNET. 1423 !*/ 1424 1425 const subnet_type& subnet( 1426 ) const; 1427 /*! 1428 ensures 1429 - returns the SUBNET base network that repeat sits on top of. If you want 1430 to access the REPEATED_LAYER components then you must use get_repeated_layer(). 1431 !*/ 1432 1433 subnet_type& subnet( 1434 ); 1435 /*! 1436 ensures 1437 - returns the SUBNET base network that repeat sits on top of. If you want 1438 to access the REPEATED_LAYER components then you must use get_repeated_layer(). 1439 !*/ 1440 }; 1441 1442 template < size_t num, template<typename> class T, typename U > 1443 std::ostream& operator<<(std::ostream& out, const repeat<num,T,U>& item); 1444 /*! 1445 prints the network architecture to the given output stream. 1446 !*/ 1447 1448 template < size_t num, template<typename> class T, typename U > 1449 void serialize(const repeat<num,T,U>& item, std::ostream& out); 1450 template < size_t num, template<typename> class T, typename U > 1451 void deserialize(repeat<num,T,U>& item, std::istream& in); 1452 /*! 1453 provides serialization support 1454 !*/ 1455 1456 // ---------------------------------------------------------------------------------------- 1457 1458 template < 1459 unsigned long ID, 1460 typename SUBNET 1461 > 1462 class add_tag_layer 1463 { 1464 /*! 1465 REQUIREMENTS ON SUBNET 1466 - One of the following must be true: 1467 - SUBNET implements the EXAMPLE_INPUT_LAYER interface defined in 1468 input_abstract.h. 1469 - SUBNET is an add_layer object. 1470 - SUBNET is an add_tag_layer object. 1471 - SUBNET is an add_skip_layer object. 1472 - SUBNET is a repeat object. 1473 1474 WHAT THIS OBJECT REPRESENTS 1475 This object adds a new layer to a deep neural network. However, this layer 1476 simply performs the identity transform. This means it is a no-op and its 1477 presence does not change the behavior of the network. It exists solely to 1478 be used by add_skip_layer to reference a particular part of a network. 1479 1480 Also, this object provides an interface identical to the one defined by the 1481 add_layer object. 1482 !*/ 1483 }; 1484 1485 template <unsigned long ID, typename U> 1486 std::ostream& operator<<(std::ostream& out, const add_tag_layer<ID,U>& item); 1487 /*! 1488 prints the network architecture to the given output stream. 1489 !*/ 1490 1491 template <unsigned long ID, typename U> 1492 void serialize(const add_tag_layer<ID,U>& item, std::ostream& out); 1493 template <unsigned long ID, typename U> 1494 void deserialize(add_tag_layer<ID,U>& item, std::istream& in); 1495 /*! 1496 provides serialization support 1497 !*/ 1498 1499 template <typename SUBNET> using tag1 = add_tag_layer< 1, SUBNET>; 1500 template <typename SUBNET> using tag2 = add_tag_layer< 2, SUBNET>; 1501 template <typename SUBNET> using tag3 = add_tag_layer< 3, SUBNET>; 1502 template <typename SUBNET> using tag4 = add_tag_layer< 4, SUBNET>; 1503 template <typename SUBNET> using tag5 = add_tag_layer< 5, SUBNET>; 1504 template <typename SUBNET> using tag6 = add_tag_layer< 6, SUBNET>; 1505 template <typename SUBNET> using tag7 = add_tag_layer< 7, SUBNET>; 1506 template <typename SUBNET> using tag8 = add_tag_layer< 8, SUBNET>; 1507 template <typename SUBNET> using tag9 = add_tag_layer< 9, SUBNET>; 1508 template <typename SUBNET> using tag10 = add_tag_layer<10, SUBNET>; 1509 1510 template <template<typename SUBNET> class tag> 1511 struct tag_id 1512 { 1513 /*! 1514 REQUIREMENTS ON tag 1515 Tag should be an add_tag_layer template such as tag1, tag2, etc. 1516 1517 WHAT THIS OBJECT REPRESENTS 1518 This is a tool for finding the numeric ID of a tag layer. For example, 1519 tag_id<tag3>::id == 3. 1520 !*/ 1521 1522 const static unsigned long id; 1523 }; 1524 1525 // ---------------------------------------------------------------------------------------- 1526 1527 template < 1528 template<typename> class TAG_TYPE, 1529 typename SUBNET 1530 > 1531 class add_skip_layer 1532 { 1533 /*! 1534 REQUIREMENTS ON SUBNET 1535 - One of the following must be true: 1536 - SUBNET is an add_layer object. 1537 - SUBNET is an add_tag_layer object. 1538 - SUBNET is an add_skip_layer object. 1539 - SUBNET is a repeat object. 1540 1541 WHAT THIS OBJECT REPRESENTS 1542 This object adds a new layer to a deep neural network which draws its 1543 inputs from layer<TAG_TYPE>(subnet()) and performs the identity transform. 1544 1545 Also, this object provides an interface identical to the one defined by the 1546 add_layer object. 1547 !*/ 1548 }; 1549 1550 template <template<typename> class T, typename U> 1551 std::ostream& operator<<(std::ostream& out, const add_skip_layer<T,U>& item); 1552 /*! 1553 prints the network architecture to the given output stream. 1554 !*/ 1555 1556 template <template<typename> class T, typename U> 1557 void serialize(const add_skip_layer<T,U>& item, std::ostream& out); 1558 template <template<typename> class T, typename U> 1559 void deserialize(add_skip_layer<T,U>& item, std::istream& in); 1560 /*! 1561 provides serialization support 1562 !*/ 1563 1564 template <typename SUBNET> using skip1 = add_skip_layer< tag1, SUBNET>; 1565 template <typename SUBNET> using skip2 = add_skip_layer< tag2, SUBNET>; 1566 template <typename SUBNET> using skip3 = add_skip_layer< tag3, SUBNET>; 1567 template <typename SUBNET> using skip4 = add_skip_layer< tag4, SUBNET>; 1568 template <typename SUBNET> using skip5 = add_skip_layer< tag5, SUBNET>; 1569 template <typename SUBNET> using skip6 = add_skip_layer< tag6, SUBNET>; 1570 template <typename SUBNET> using skip7 = add_skip_layer< tag7, SUBNET>; 1571 template <typename SUBNET> using skip8 = add_skip_layer< tag8, SUBNET>; 1572 template <typename SUBNET> using skip9 = add_skip_layer< tag9, SUBNET>; 1573 template <typename SUBNET> using skip10 = add_skip_layer<tag10, SUBNET>; 1574 1575 // ---------------------------------------------------------------------------------------- 1576 1577 template < 1578 unsigned int i, 1579 typename net_type 1580 > 1581 auto& layer ( 1582 net_type& n 1583 ); 1584 /*! 1585 requires 1586 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1587 add_tag_layer. 1588 - i < net_type::num_layers 1589 ensures 1590 - This function allows you to access any layer in a network by its layer index 1591 i. Therefore, it will walk i steps down the network and return the layer 1592 object there. Since networks can be big, the best way to find layer index 1593 numbers is to print a network to the screen since the print out will include 1594 indexes for each layer. 1595 - In general, this function chains together i calls to n.subnet() and returns 1596 the result. So for example: 1597 - if (i == 0) 1598 - returns n 1599 - else if (i == 1) 1600 - returns n.subnet() 1601 - else if (i == 2) 1602 - returns n.subnet().subnet() 1603 - else if (i == 3) 1604 - returns n.subnet().subnet().subnet() 1605 - else 1606 - etc. 1607 Except that when it hits a repeat layer it recurses into the repeated layers 1608 contained inside. That is, if the layer index indicates a layer in a repeat 1609 object this function will make the appropriate call to get_repeated_layer() 1610 and do the right thing. 1611 !*/ 1612 1613 template < 1614 template<typename> class Match, 1615 typename net_type 1616 > 1617 auto& layer ( 1618 net_type& n 1619 ); 1620 /*! 1621 requires 1622 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1623 add_tag_layer. 1624 ensures 1625 - returns the first layer in n that is of type Match. E.g. if net_type is 1626 fc<relu<fc<input<sample_type>>>> then calling layer<relu>(n) would return 1627 layer<1>(n), that is, a reference to the relu layer. 1628 !*/ 1629 1630 template < 1631 template<typename> class Match, 1632 unsigned int i, 1633 typename net_type 1634 > 1635 auto& layer ( 1636 net_type& n 1637 ); 1638 /*! 1639 requires 1640 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1641 add_tag_layer. 1642 ensures 1643 - returns layer<i>(layer<Match>(n)) 1644 !*/ 1645 1646 // ---------------------------------------------------------------------------------------- 1647 1648 template <typename net_type> 1649 auto& input_layer ( 1650 net_type& net 1651 ); 1652 /*! 1653 requires 1654 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1655 add_tag_layer. 1656 ensures 1657 - returns the input later of the given network object. Specifically, this 1658 function is equivalent to calling: 1659 layer<net_type::num_layers-1>(net); 1660 That is, you get the input layer details object for the network. 1661 !*/ 1662 1663 // ---------------------------------------------------------------------------------------- 1664 1665 template < 1666 typename net_type, 1667 typename visitor 1668 > 1669 void visit_layer_parameters( 1670 net_type& net, 1671 visitor v 1672 ); 1673 /*! 1674 requires 1675 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1676 add_tag_layer. 1677 - v is a function object with a signature equivalent to: 1678 v(size_t idx, tensor& t) 1679 or: 1680 v(tensor& t) 1681 ensures 1682 - Loops over all the computational layers (i.e. layers with parameters, as 1683 opposed to loss, tag, or input layers) in net and passes their parameters to 1684 v(). To be specific, this function essentially performs the following: 1685 1686 size_t computational_layer_idx = 0; 1687 for (size_t i = 0; i < net_type::num_layers; ++i) 1688 { 1689 if (layer<i>(net) is a computational layer) 1690 { 1691 v(computational_layer_idx, layer<i>(net).layer_details().get_layer_params()); 1692 ++computational_layer_idx; 1693 } 1694 } 1695 - When v() is called, the first argument is always < net_type::num_computational_layers. 1696 !*/ 1697 1698 // ---------------------------------------------------------------------------------------- 1699 1700 template < 1701 typename net_type, 1702 typename visitor 1703 > 1704 void visit_layer_parameter_gradients( 1705 net_type& net, 1706 visitor v 1707 ); 1708 /*! 1709 requires 1710 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1711 add_tag_layer. 1712 - v is a function object with a signature equivalent to: 1713 v(size_t idx, tensor& t) 1714 or: 1715 v(tensor& t) 1716 ensures 1717 - Loops over all the computational layers (i.e. layers with parameters, as 1718 opposed to loss, tag, or input layers) in net and passes their parameter 1719 gradients to v(). To be specific, this function essentially performs the 1720 following: 1721 1722 size_t computational_layer_idx = 0; 1723 for (size_t i = 0; i < net_type::num_layers; ++i) 1724 { 1725 if (layer<i>(net) is a computational layer) 1726 { 1727 v(computational_layer_idx, layer<i>(net).get_parameter_gradient()); 1728 ++computational_layer_idx; 1729 } 1730 } 1731 - When v() is called, the first argument is always < net_type::num_computational_layers. 1732 !*/ 1733 1734 // ---------------------------------------------------------------------------------------- 1735 1736 template < 1737 typename net_type, 1738 typename visitor 1739 > 1740 void visit_layers( 1741 net_type& net, 1742 visitor v 1743 ); 1744 /*! 1745 requires 1746 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1747 add_tag_layer. 1748 - v is a function object with a signature equivalent to: 1749 v(size_t idx, any_net_type& t) 1750 or: 1751 v(any_net_type& t) 1752 That is, it takes an optional size_t and then any of the network types such as 1753 add_layer, add_loss_layer, etc. 1754 ensures 1755 - Loops over all the layers in net and calls v() on them. To be specific, this 1756 function essentially performs the following: 1757 1758 for (size_t i = 0; i < net_type::num_layers; ++i) 1759 v(i, layer<i>(net)); 1760 !*/ 1761 1762 template < 1763 typename net_type, 1764 typename visitor 1765 > 1766 void visit_layers_backwards( 1767 net_type& net, 1768 visitor v 1769 ); 1770 /*! 1771 requires 1772 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1773 add_tag_layer. 1774 - v is a function object with a signature equivalent to: 1775 v(size_t idx, any_net_type& t) 1776 or: 1777 v(any_net_type& t) 1778 That is, it takes an optional size_t and then any of the network types such as 1779 add_layer, add_loss_layer, etc. 1780 ensures 1781 - Loops over all the layers in net and calls v() on them. The loop happens in 1782 the reverse order of visit_layers(). To be specific, this function 1783 essentially performs the following: 1784 1785 for (size_t i = net_type::num_layers; i != 0; --i) 1786 v(i-1, layer<i-1>(net)); 1787 !*/ 1788 1789 // ---------------------------------------------------------------------------------------- 1790 1791 template < 1792 typename net_type, 1793 typename visitor 1794 > 1795 void visit_computational_layers( 1796 net_type& net, 1797 visitor v 1798 ); 1799 /*! 1800 requires 1801 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1802 add_tag_layer. 1803 - v is a function object with a signature equivalent to: 1804 v(size_t idx, any_computational_layer& t) 1805 or: 1806 v(any_computational_layer& t) 1807 That is, it takes an optional size_t and then any of the computational layers. E.g. 1808 one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_. 1809 ensures 1810 - Loops over all the computational layers in net and calls v() on them. To be specific, this 1811 function essentially performs the following: 1812 1813 for (size_t i = 0; i < net_type::num_layers; ++i) 1814 if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer) 1815 v(i, layer<i>(net).layer_details()); 1816 !*/ 1817 1818 template < 1819 size_t begin, 1820 size_t end, 1821 typename net_type, 1822 typename visitor 1823 > 1824 void visit_computational_layers_range( 1825 net_type& net, 1826 visitor v 1827 ); 1828 /*! 1829 requires 1830 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1831 add_tag_layer. 1832 - v is a function object with a signature equivalent to: 1833 v(size_t idx, any_computational_layer& t) 1834 or: 1835 v(any_computational_layer& t) 1836 That is, it takes an optional size_t and then any of the computational layers. E.g. 1837 one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_. 1838 ensures 1839 - Loops over all the computational layers in the range [begin,end) in net and calls v() 1840 on them. To be specific, this function essentially performs the following: 1841 1842 for (size_t i = begin; i < end; ++i) 1843 if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer) 1844 v(i, layer<i>(net).layer_details()); 1845 !*/ 1846 1847 // ---------------------------------------------------------------------------------------- 1848 1849 template < 1850 size_t begin, 1851 size_t end, 1852 typename net_type, 1853 typename visitor 1854 > 1855 void visit_layers_range( 1856 net_type& net, 1857 visitor v 1858 ); 1859 /*! 1860 requires 1861 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1862 add_tag_layer. 1863 - v is a function object with a signature equivalent to: 1864 v(size_t idx, any_net_type& t) 1865 or: 1866 v(any_net_type& t) 1867 That is, it takes an optional size_t and then any of the network types such as 1868 add_layer, add_loss_layer, etc. 1869 - begin <= end <= net_type::num_layers 1870 ensures 1871 - Loops over the layers in the range [begin,end) in net and calls v() on them. 1872 To be specific, this function essentially performs the following: 1873 1874 for (size_t i = begin; i < end; ++i) 1875 v(i, layer<i>(net)); 1876 !*/ 1877 1878 template < 1879 size_t begin, 1880 size_t end, 1881 typename net_type, 1882 typename visitor 1883 > 1884 void visit_layers_backwards_range( 1885 net_type& net, 1886 visitor v 1887 ); 1888 /*! 1889 requires 1890 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1891 add_tag_layer. 1892 - v is a function object with a signature equivalent to: 1893 v(size_t idx, any_net_type& t) 1894 or: 1895 v(any_net_type& t) 1896 That is, it takes an optional size_t and then any of the network types such as 1897 add_layer, add_loss_layer, etc. 1898 - begin <= end <= net_type::num_layers 1899 ensures 1900 - Loops over the layers in the range [begin,end) in net and calls v() on them. 1901 The loop happens in the reverse order of visit_layers_range(). To be specific, 1902 this function essentially performs the following: 1903 1904 for (size_t i = end; i != begin; --i) 1905 v(i-1, layer<i-1>(net)); 1906 !*/ 1907 1908 // ---------------------------------------------------------------------------------------- 1909 1910 template < 1911 unsigned long tag_id, 1912 typename net_type, 1913 typename visitor 1914 > 1915 void visit_layers_until_tag( 1916 net_type& net, 1917 visitor v 1918 ); 1919 /*! 1920 requires 1921 - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or 1922 add_tag_layer. 1923 - v is a function object with a signature equivalent to: 1924 v(any_net_type& t) 1925 That is, it must take any of the network types such as add_layer, 1926 add_loss_layer, etc. 1927 ensures 1928 - Loops over all the layers in net beginning with layer<0>(net) and going until 1929 a tag layer with an ID of tag_id is encountered. To be specific, this 1930 function essentially performs the following: 1931 1932 size_t i = 0; 1933 while(layer<i>(net) isn't an add_tag_layer with ID == tag_id) { 1934 v(layer<i>(net)); 1935 ++i; 1936 } 1937 v(layer<i>(net)); // also visits the tag layer itself at the very end. 1938 !*/ 1939 1940 // ---------------------------------------------------------------------------------------- 1941 1942 struct layer_test_results 1943 { 1944 std::string log; 1945 bool was_good; 1946 1947 operator bool() const { return was_good; } 1948 }; 1949 1950 inline std::ostream& operator<< (std::ostream& out, const layer_test_results& item) 1951 { 1952 out << item.log; 1953 return out; 1954 } 1955 1956 template < 1957 typename layer_details_type 1958 > 1959 layer_test_results test_layer ( 1960 layer_details_type l 1961 ); 1962 /*! 1963 ensures 1964 - Checks if l correctly implements the EXAMPLE_COMPUTATIONAL_LAYER_ interface 1965 defined in layers_abstract.h. Importantly, it computes numerical approximations 1966 to the gradients and compares them to the outputs of the layer. 1967 - The results of the testing are returned. In particular, if the returned object 1968 is RESULT then we will have: 1969 - RESULT.was_good == false if and only if the layer failed the testing. 1970 - RESULT.log == a string describing why the testing failed if was_good==false. 1971 - Note that this function is only capable of checking layers that take 1972 arbitrary subnetworks as input. So if you have designed a layer that expects 1973 only a certain restricted type of subnetwork then you might get a compile or 1974 runtime error when you call this function. 1975 !*/ 1976 1977 // ---------------------------------------------------------------------------------------- 1978 1979 } 1980 1981 #endif // DLIB_DNn_CORE_ABSTRACT_H_ 1982 1983