1 // Copyright (C) 2015  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #undef DLIB_DNn_CORE_ABSTRACT_H_
4 #ifdef DLIB_DNn_CORE_ABSTRACT_H_
5 
6 #include "../cuda/tensor_abstract.h"
7 #include <memory>
8 #include <type_traits>
9 #include <tuple>
10 #include <vector>
11 #include "../rand.h"
12 
13 
14 namespace dlib
15 {
16 
17 // ----------------------------------------------------------------------------------------
18 
19     template <
20         typename... T
21         >
22     auto tuple_tail(
23         const std::tuple<T...>& item
24     );
25     /*!
26         ensures
27             - returns a tuple that contains everything in item except for tuple_head(item).
28               The items will be in the same order as they are in item, just without
29               tuple_head(item).
30             - This function will correctly handle nested tuples.
31     !*/
32 
33     template <typename... T>
34     auto tuple_head (
35         const std::tuple<T...>& item
36     );
37     /*!
38         ensures
39             - returns a copy of the first thing in the tuple that isn't a std::tuple.
40               Essentially, this function calls std::get<0>() recursively on item until
41               a non-std::tuple object is found.
42     !*/
43 
44 // ----------------------------------------------------------------------------------------
45 
46     template <typename T>
47     double get_learning_rate_multiplier(
48         const T& obj
49     );
50     /*!
51         ensures
52             - if (obj has a get_learning_rate_multiplier() member function) then
53                 - returns obj.get_learning_rate_multiplier()
54             - else
55                 - returns 1
56     !*/
57 
58     template <typename T>
59     void set_learning_rate_multiplier(
60         T& obj,
61         double learning_rate_multiplier
62     );
63     /*!
64         requires
65             - learning_rate_multiplier >= 0
66         ensures
67             - if (obj has a set_learning_rate_multiplier() member function) then
68                 - calls obj.set_learning_rate_multiplier(learning_rate_multiplier)
69             - else
70                 - does nothing
71     !*/
72 
73 // ----------------------------------------------------------------------------------------
74 
75     template <typename T>
76     double get_bias_learning_rate_multiplier(
77         const T& obj
78     );
79     /*!
80         ensures
81             - if (obj has a get_bias_learning_rate_multiplier() member function) then
82                 - returns obj.get_bias_learning_rate_multiplier()
83             - else
84                 - returns 1
85     !*/
86 
87     template <typename T>
88     void set_bias_learning_rate_multiplier(
89         T& obj,
90         double bias_learning_rate_multiplier
91     );
92     /*!
93         requires
94             - bias_learning_rate_multiplier >= 0
95         ensures
96             - if (obj has a set_bias_learning_rate_multiplier() member function) then
97                 - calls obj.set_bias_learning_rate_multiplier(bias_learning_rate_multiplier)
98             - else
99                 - does nothing
100     !*/
101 
102 // ----------------------------------------------------------------------------------------
103 
104     template <typename T>
105     double get_weight_decay_multiplier(
106         const T& obj
107     );
108     /*!
109         ensures
110             - if (obj has a get_weight_decay_multiplier() member function) then
111                 - returns obj.get_weight_decay_multiplier()
112             - else
113                 - returns 1
114     !*/
115 
116     template <typename T>
117     void set_weight_decay_multiplier(
118         T& obj,
119         double weight_decay_multiplier
120     );
121     /*!
122         requires
123             - weight_decay_multiplier >= 0
124         ensures
125             - if (obj has a set_weight_decay_multiplier() member function) then
126                 - calls obj.set_weight_decay_multiplier(weight_decay_multiplier)
127             - else
128                 - does nothing
129     !*/
130 
131 // ----------------------------------------------------------------------------------------
132 
133     template <typename T>
134     double get_bias_weight_decay_multiplier(
135         const T& obj
136     );
137     /*!
138         ensures
139             - if (obj has a get_bias_weight_decay_multiplier() member function) then
140                 - returns obj.get_bias_weight_decay_multiplier()
141             - else
142                 - returns 1
143     !*/
144 
145     template <typename T>
146     void set_bias_weight_decay_multiplier(
147         T& obj,
148         double bias_weight_decay_multiplier
149     );
150     /*!
151         requires:
152             - bias_weight_decay_multiplier >= 0
153         ensures
154             - if (obj has a set_bias_weight_decay_multiplier() member function) then
155                 - calls obj.set_bias_weight_decay_multiplier(bias_weight_decay_multiplier)
156             - else
157                 - does nothing
158     !*/
159 
160 // ----------------------------------------------------------------------------------------
161 
162     template <typename T>
163     void disable_bias(
164         T& obj
165     );
166     /*!
167         ensures
168             - if (obj has a disable_bias() member function) then
169                 - calls obj.disable_bias()
170             - else
171                 - does nothing
172     !*/
173 
174 // ----------------------------------------------------------------------------------------
175 
176     bool dnn_prefer_fastest_algorithms(
177     );
178     /*!
179         ensures
180             - If dlib should prefer to use fast algorithms rather than ones that use less
181               RAM then this function returns true and false otherwise.
182             - On program startup this function will default to true.
183     !*/
184 
185     void set_dnn_prefer_fastest_algorithms(
186     );
187     /*!
188         ensures
189             - #dnn_prefer_fastest_algorithms() == true
190     !*/
191 
192     void set_dnn_prefer_smallest_algorithms(
193     );
194     /*!
195         ensures
196             - #dnn_prefer_fastest_algorithms() == false
197     !*/
198 
199 // ----------------------------------------------------------------------------------------
200 
201     template <
202         typename T
203         >
204     class sstack
205     {
206         /*!
207             WHAT THIS OBJECT REPRESENTS
208                 This is a basic stack of T objects.  It contains no data itself but simply
209                 points to a memory range of T object and allows you to access that block of
210                 T objects as a stack.
211         !*/
212 
213     public:
214         typedef T value_type;
215 
216         sstack() = delete;
217 
218         sstack (
219             T* data,
220             size_t s
221         );
222         /*!
223             ensures
224                 - #size() == s
225                 - #top() == *data
226                 - #pop(i).top() == data[i]
227         !*/
228 
229         const T& top(
230         ) const;
231         /*!
232             requires
233                 - size() != 0
234             ensures
235                 - returns the top element of the stack.
236         !*/
237 
238         T& top(
239         );
240         /*!
241             requires
242                 - size() != 0
243             ensures
244                 - returns the top element of the stack.
245         !*/
246 
247         size_t size(
248         ) const;
249         /*!
250             ensures
251                 - returns the number of elements in this stack.
252         !*/
253 
254         sstack pop(
255             size_t num = 1
256         );
257         /*!
258             requires
259                 - num <= size()
260             ensures
261                 - returns a reference to the sub-stack S such that:
262                     - S.size() == size()-num.
263                     - S.top() is num elements down the stack.
264         !*/
265     };
266 
267     template <
268         typename T
269         >
make_sstack(std::vector<T> & item)270     sstack<T> make_sstack(
271         std::vector<T>& item
272     ) { return sstack<T>(item.data(), item.size()); }
273     /*!
274         ensures
275             - returns a sstack that sits on top of the given std::vector.
276     !*/
277 
278 // ----------------------------------------------------------------------------------------
279 
280     template <
281         typename LAYER_DETAILS,
282         typename SUBNET
283         >
284     class add_layer
285     {
286         /*!
287             REQUIREMENTS ON LAYER_DETAILS
288                 - Must be a type that implements the EXAMPLE_COMPUTATIONAL_LAYER_ interface
289                   defined in layers_abstract.h
290 
291             REQUIREMENTS ON SUBNET
292                 - One of the following must be true:
293                     - SUBNET implements the EXAMPLE_INPUT_LAYER interface defined in
294                       input_abstract.h.
295                     - SUBNET is an add_layer object.
296                     - SUBNET is an add_tag_layer object.
297                     - SUBNET is an add_skip_layer object.
298                     - SUBNET is a repeat object.
299 
300             WHAT THIS OBJECT REPRESENTS
301                 This object represents a deep neural network.  In particular, it is a tool
302                 for adding another layer on top of the neural network of type SUBNET, which
303                 is specified as a template argument.  The specific layer added is defined
304                 by the LAYER_DETAILS details template argument.
305         !*/
306 
307     public:
308         typedef LAYER_DETAILS layer_details_type;
309         typedef SUBNET subnet_type;
310         typedef typename subnet_type::input_type input_type;
311         // num_computational_layers will always give the number of layers in the network
312         // that transform tensors (i.e. layers defined by something that implements the
313         // EXAMPLE_COMPUTATIONAL_LAYER_ interface).  This is all the layers except for
314         // loss, tag, and skip layers.
315         const static size_t num_computational_layers = subnet_type::num_computational_layers + 1;
316         // num_layers counts all the layers in the network regardless of their type.
317         const static size_t num_layers = subnet_type::num_layers + 1;
318 
319         add_layer(
320         );
321         /*!
322             ensures
323                 - default constructs all the layers in this network.
324                 - #sample_expansion_factor() == 0
325         !*/
326 
327         add_layer(const add_layer&) = default;
328         add_layer(add_layer&&) = default;
329         add_layer& operator=(add_layer&&) = default;
330         add_layer& operator=(const add_layer&) = default;
331         /*!
332             ensures
333                 - this object is copyable and movable.
334         !*/
335 
336         template <typename T, typename U>
337         add_layer(
338             const add_layer<T,U>& item
339         );
340         /*!
341             ensures
342                 - This constructor allows you to copy neural network objects from one to
343                   another as long as their corresponding layers can be constructed from
344                   each other.
345                 - #layer_details() == layer_details_type(item.layer_details())
346                 - #subnet()        == subnet_type(item.subnet())
347                 - #sample_expansion_factor() == item.sample_expansion_factor()
348         !*/
349 
350         template <typename ...T, typename LD, typename ...U>
351         add_layer(
352             const std::tuple<LD,U...>& layer_det,
353             T&& ...args
354         );
355         /*!
356             ensures
357                 - #layer_details() == layer_details_type(tuple_head(layer_det))
358                 - #subnet()        == subnet_type(tuple_tail(layer_det),args)
359                 - #sample_expansion_factor() == 0
360         !*/
361 
362         template <typename ...T>
363         add_layer(
364             const layer_details_type& layer_det,
365             T&& ...args
366         );
367         /*!
368             ensures
369                 - #layer_details() == layer_details_type(layer_det)
370                 - #subnet()        == subnet_type(args)
371                 - #sample_expansion_factor() == 0
372         !*/
373 
374         template <typename ...T>
375         add_layer(
376             T&& ...args
377         );
378         /*!
379             ensures
380                 - This version of the constructor is only called if layer_details_type
381                   can't be constructed from the first thing in args.  In this case, the
382                   args are simply passed on to the sub layers in their entirety.
383                 - #layer_details() == layer_details_type()
384                 - #subnet()        == subnet_type(args)
385                 - #sample_expansion_factor() == 0
386         !*/
387 
388         template <typename ...T>
389         add_layer(
390             layer_details_type&& layer_det,
391             T&& ...args
392         );
393         /*!
394             ensures
395                 - #layer_details() == layer_det
396                 - #subnet()        == subnet_type(args)
397                 - #sample_expansion_factor() == 0
398         !*/
399 
400         template <typename forward_iterator>
401         void to_tensor (
402             forward_iterator ibegin,
403             forward_iterator iend,
404             resizable_tensor& data
405         ) const;
406         /*!
407             requires
408                 - [ibegin, iend) is an iterator range over input_type objects.
409                 - std::distance(ibegin,iend) > 0
410             ensures
411                 - Converts the iterator range into a tensor and stores it into #data.
412                 - #data.num_samples()%distance(ibegin,iend) == 0.
413                 - #sample_expansion_factor() == #data.num_samples()/distance(ibegin,iend).
414                 - #sample_expansion_factor() > 0
415                 - The data in the ith sample of #data corresponds to the input_type object
416                   *(ibegin+i/#sample_expansion_factor()).
417                 - Invokes data.async_copy_to_device() so that the data begins transferring
418                   to the GPU device, if present.
419                 - This function is implemented by calling the to_tensor() routine defined
420                   at the input layer of this network.
421         !*/
422 
423         unsigned int sample_expansion_factor (
424         ) const;
425         /*!
426             ensures
427                 - When to_tensor() is invoked on this network's input layer it converts N
428                   input objects into M samples, all stored inside a resizable_tensor.  It
429                   is always the case that M is some integer multiple of N.
430                   sample_expansion_factor() returns the value of this multiplier.  To be
431                   very specific, it is always true that M==I*N where I is some integer.
432                   This integer I is what is returned by sample_expansion_factor().
433         !*/
434 
435         const subnet_type& subnet(
436         ) const;
437         /*!
438             ensures
439                 - returns the immediate subnetwork of *this network.
440         !*/
441 
442         subnet_type& subnet(
443         );
444         /*!
445             ensures
446                 - returns the immediate subnetwork of *this network.
447         !*/
448 
449         const layer_details_type& layer_details(
450         ) const;
451         /*!
452             ensures
453                 - returns the layer_details_type instance that defines the behavior of the
454                   layer at the top of this network.  I.e. returns the layer details that
455                   defines the behavior of the layer nearest to the network output rather
456                   than the input layer.
457         !*/
458 
459         layer_details_type& layer_details(
460         );
461         /*!
462             ensures
463                 - returns the layer_details_type instance that defines the behavior of the
464                   layer at the top of this network.  I.e. returns the layer details that
465                   defines the behavior of the layer nearest to the network output rather
466                   than the input layer.
467         !*/
468 
469         template <typename forward_iterator>
470         const tensor& operator() (
471             forward_iterator ibegin,
472             forward_iterator iend
473         );
474         /*!
475             requires
476                 - [ibegin, iend) is an iterator range over input_type objects.
477                 - std::distance(ibegin,iend) > 0
478             ensures
479                 - runs [ibegin,iend) through the network and returns the results.
480                   In particular, this function performs:
481                     to_tensor(ibegin,iend,temp_tensor);
482                     return forward(temp_tensor);
483                 - The return value from this function is also available in #get_output().
484                   i.e. this function returns #get_output().
485                 - have_same_dimensions(#get_gradient_input(), #get_output()) == true.
486                 - All elements of #get_gradient_input() are set to 0.
487                   i.e. calling this function clears out #get_gradient_input() and ensures
488                   it has the same dimensions as the most recent output.
489         !*/
490 
491         const tensor& operator() (
492             const input_type& x
493         );
494         /*!
495             ensures
496                 - runs a single x through the network and returns the output.
497                   I.e. returns (*this)(&x, &x+1);
498         !*/
499 
500         const tensor& forward(
501             const tensor& x
502         );
503         /*!
504             requires
505                 - sample_expansion_factor() != 0
506                   (i.e. to_tensor() must have been called to set sample_expansion_factor()
507                   to something non-zero.)
508                 - x.num_samples()%sample_expansion_factor() == 0
509                 - x.num_samples() > 0
510             ensures
511                 - Runs x through the network and returns the results.  In particular, this
512                   function performs the equivalent of:
513                     subnet().forward(x);
514                     if (this is the first time forward() has been called) then
515                         layer_details().setup(subnet());
516                     layer_details().forward(subnet(), get_output());
517                 - The return value from this function is also available in #get_output().
518                   i.e. this function returns #get_output().
519                 - have_same_dimensions(#get_gradient_input(), #get_output()) == true
520                 - All elements of #get_gradient_input() are set to 0.
521                   i.e. calling this function clears out #get_gradient_input() and ensures
522                   it has the same dimensions as the most recent output.
523         !*/
524 
525         const tensor& get_output(
526         ) const;
527         /*!
528             ensures
529                 - returns the output for the last tensor that was run through the network.
530                   If nothing has been run through the network yet then returns an empty
531                   tensor.
532         !*/
533 
534         tensor& get_gradient_input(
535         );
536         /*!
537             ensures
538                 - returns the error gradient for this network.  That is, this is the error
539                   gradient that this network will use to compute parameter gradients when
540                   back_propagate_error() is called.  Therefore, when performing back
541                   propagation, layers that sit on top of this network layer write their
542                   back-propagated error gradients into get_gradient_input().  Or to put it
543                   another way, during back-propagation, layers take the contents of their
544                   get_gradient_input() and back-propagate it through themselves and store
545                   the result into their subnetwork's get_gradient_input().
546 
547                   This means you should consider get_gradient_input() as an input to the
548                   back_propagate_error() method.
549         !*/
550 
551         const tensor& get_final_data_gradient(
552         ) const;
553         /*!
554             ensures
555                 - if back_propagate_error() has been called to back-propagate a gradient
556                   through this network then you can call get_final_data_gradient() to
557                   obtain the last data gradient computed.  That is, this function returns
558                   the gradient of the network with respect to its inputs.
559                 - Note that there is only one "final data gradient" for an entire network,
560                   not one per layer, since there is only one input to the entire network.
561         !*/
562 
563         const tensor& get_parameter_gradient(
564         ) const;
565         /*!
566             ensures
567                 - if back_propagate_error() has been called then you can call
568                   get_parameter_gradient() to find the gradient of this layer's parameters.
569                   When we update the parameters by calling update_parameters(), it will use
570                   the gradient in get_parameter_gradient() to perform the update.
571                   Therefore, you should consider get_parameter_gradient() as an input to
572                   update_parameters().
573         !*/
574 
575         tensor& get_parameter_gradient (
576         );
577         /*!
578             ensures
579                 - returns a non-const reference to the tensor returned by the above
580                   get_parameter_gradient() method.  You could use this method to modify the
581                   parameter gradient in some way before invoking update_parameters().
582         !*/
583 
584         void back_propagate_error(
585             const tensor& x
586         );
587         /*!
588             requires
589                 - forward(x) was called to forward propagate x though the network.
590                   Moreover, this was the most recent call to forward() and x has not been
591                   subsequently modified in any way.
592                 - get_gradient_input() has been set equal to the gradient of this network's
593                   output with respect to some loss function.
594             ensures
595                 - Back propagates the error gradient, get_gradient_input(), through this
596                   network and computes parameter and data gradients, via backpropagation.
597                   Specifically, this function populates get_final_data_gradient() and also,
598                   for each layer, the tensor returned by get_parameter_gradient().
599                 - All elements of #get_gradient_input() are set to 0.
600                 - have_same_dimensions(#get_final_data_gradient(), x) == true.
601                 - have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true.
602                 - #get_final_data_gradient() contains the gradient of the network with
603                   respect to x.
604         !*/
605 
606         void back_propagate_error(
607             const tensor& x,
608             const tensor& gradient_input
609         );
610         /*!
611             requires
612                 - forward(x) was called to forward propagate x though the network.
613                   Moreover, this was the most recent call to forward() and x has not been
614                   subsequently modified in any way.
615                 - have_same_dimensions(gradient_input, get_output()) == true
616             ensures
617                 - This function is identical to the version of back_propagate_error()
618                   defined immediately above except that it back-propagates gradient_input
619                   through the network instead of get_gradient_input().  Therefore, this
620                   version of back_propagate_error() is equivalent to performing:
621                     get_gradient_input() = gradient_input;
622                     back_propagate_error(x);
623                   Except that calling back_propagate_error(x,gradient_input) avoids the
624                   copy and is therefore slightly more efficient.
625                 - All elements of #get_gradient_input() are set to 0.
626                 - have_same_dimensions(#get_final_data_gradient(), x) == true.
627                 - have_same_dimensions(#get_parameter_gradient(), layer_details().get_layer_params()) == true.
628                 - #get_final_data_gradient() contains the gradient of the network with
629                   respect to x.
630         !*/
631 
632         template <typename solver_type>
633         void update_parameters(
634             sstack<solver_type> solvers,
635             double learning_rate
636         );
637         /*!
638             requires
639                 - solver_type is an implementation of the EXAMPLE_SOLVER interface defined
640                   in solvers_abstract.h
641                 - back_propagate_error() has been called.
642                 - The given solvers have only ever been used with this network.  That is,
643                   if you want to call update_parameters() on some other neural network
644                   object then you must NOT reuse the same solvers object.
645                 - solvers.size() >= num_computational_layers
646                 - 0 < learning_rate <= 1
647             ensures
648                 - Updates all the parameters in the network.  In particular, we pass each
649                   layer's parameter gradient (i.e. the tensor returned by the layer's
650                   get_parameter_gradient() member) through that layer's corresponding
651                   solver object.  This produces a parameter delta vector which we add to
652                   the layer's parameters.
653                 - The solvers use the given learning rate.
654         !*/
655 
656         template <typename solver_type>
update_parameters(std::vector<solver_type> & solvers,double learning_rate)657         void update_parameters(std::vector<solver_type>& solvers, double learning_rate)
658         { update_parameters(make_sstack(solvers), learning_rate); }
659         /*!
660             Convenience method for calling update_parameters()
661         !*/
662 
663         void clean(
664         );
665         /*!
666             ensures
667                 - Causes the network to forget about everything but its parameters.
668                   That is, for each layer we will have:
669                     - get_output().num_samples() == 0
670                     - get_gradient_input().num_samples() == 0
671                   However, running new input data though this network will still produce
672                   the same output it would have produced regardless of any calls to
673                   clean().  The purpose of clean() is to compact the network object prior
674                   to saving it to disk so that it takes up less space and the IO is
675                   quicker.
676                 - This also calls the .clean() method on any layer details objects that
677                   define a .clean() method.
678         !*/
679 
680     };
681 
682     template <typename T, typename U>
683     std::ostream& operator<<(std::ostream& out, const add_layer<T,U>& item);
684     /*!
685         prints the network architecture to the given output stream.
686     !*/
687 
688     template <typename T, typename U>
689     void serialize(const add_layer<T,U>& item, std::ostream& out);
690     template <typename T, typename U>
691     void deserialize(add_layer<T,U>& item, std::istream& in);
692     /*!
693         provides serialization support
694     !*/
695 
696 // ----------------------------------------------------------------------------------------
697 // ----------------------------------------------------------------------------------------
698 // ----------------------------------------------------------------------------------------
699 
700     class no_label_type;
701 
702     template <
703         typename LOSS_DETAILS,
704         typename SUBNET
705         >
706     class add_loss_layer
707     {
708         /*!
709             REQUIREMENTS ON LOSS_DETAILS
710                 - Must be a type that implements the EXAMPLE_LOSS_LAYER_ interface defined
711                   in loss_abstract.h
712 
713             REQUIREMENTS ON SUBNET
714                 - One of the following must be true:
715                     - SUBNET is an add_layer object.
716                     - SUBNET is an add_tag_layer object.
717                     - SUBNET is an add_skip_layer object.
718                     - SUBNET is a repeat object.
719 
720             WHAT THIS OBJECT REPRESENTS
721                 This object represents a deep neural network.  In particular, it is a tool
722                 for adding a loss layer on top of the neural network of type SUBNET, which
723                 is specified as a template argument.  The specific layer added is defined
724                 by the LOSS_DETAILS details template argument.  Importantly, a loss layer
725                 is the last layer in a deep neural network.  So once it is added you can't
726                 add any other layers of any type.
727         !*/
728 
729     public:
730         typedef LOSS_DETAILS loss_details_type;
731         typedef SUBNET subnet_type;
732         typedef typename subnet_type::input_type input_type;
733         const static size_t num_computational_layers = subnet_type::num_computational_layers;
734         const static size_t num_layers = subnet_type::num_layers + 1;
735         // If LOSS_DETAILS is an unsupervised loss then training_label_type==no_label_type.
736         // Otherwise it is defined as follows:
737         typedef typename LOSS_DETAILS::training_label_type training_label_type;
738         // Similarly, if LOSS_DETAILS doesn't provide any output conversion then
739         // output_label_type==no_label_type.
740         typedef typename LOSS_DETAILS::output_label_type output_label_type;
741 
742 
743 
744         add_loss_layer() = default;
745         /*!
746             ensures
747                 - default constructs all the layers in this network.
748         !*/
749 
750         add_loss_layer(const add_loss_layer&) = default;
751         add_loss_layer(add_loss_layer&&) = default;
752         add_loss_layer& operator=(add_loss_layer&&) = default;
753         add_loss_layer& operator=(const add_loss_layer&) = default;
754         /*!
755             ensures
756                 - this object is copyable and movable.
757         !*/
758 
759         template <typename T, typename U>
760         add_loss_layer(
761             const add_loss_layer<T,U>& item
762         );
763         /*!
764             ensures
765                 - This constructor allows you to copy neural network objects from one to
766                   another as long as their corresponding layers can be constructed from
767                   each other.
768                 - #loss_details() == loss_details_type(item.loss_details())
769                 - #subnet()       == subnet_type(item.subnet())
770         !*/
771 
772         template <typename ...T>
773         add_loss_layer(
774             const LOSS_DETAILS& layer_det,
775             T&& ...args
776         );
777         /*!
778             ensures
779                 - #loss_details() == loss_details_type(layer_det)
780                 - #subnet()       == subnet_type(args)
781         !*/
782 
783         template <typename ...T>
784         add_loss_layer(
785             LOSS_DETAILS&& layer_det,
786             T&& ...args
787         );
788         /*!
789             ensures
790                 - #loss_details() == loss_details_type(layer_det)
791                 - #subnet()       == subnet_type(args)
792         !*/
793 
794         template <typename ...T>
795         add_loss_layer(
796             T&& ...args
797         );
798         /*!
799             ensures
800                 - This version of the constructor is only called if loss_details_type can't
801                   be constructed from the first thing in args.  In this case, the args are
802                   simply passed on to the sub layers in their entirety.
803                 - #loss_details() == loss_details_type()
804                 - #subnet()       == subnet_type(args)
805         !*/
806 
807         const subnet_type& subnet(
808         ) const;
809         /*!
810             ensures
811                 - returns the immediate subnetwork of *this network.
812         !*/
813 
814         subnet_type& subnet(
815         );
816         /*!
817             ensures
818                 - returns the immediate subnetwork of *this network.
819         !*/
820 
821         const loss_details_type& loss_details(
822         ) const;
823         /*!
824             ensures
825                 - returns the loss_details_type instance that defines the behavior of the
826                   loss layer used by this network.
827         !*/
828 
829         loss_details_type& loss_details(
830         );
831         /*!
832             ensures
833                 - returns the loss_details_type instance that defines the behavior of the
834                   loss layer used by this network.
835         !*/
836 
837         template <typename forward_iterator>
838         void to_tensor (
839             forward_iterator ibegin,
840             forward_iterator iend,
841             resizable_tensor& data
842         ) const;
843         /*!
844             requires
845                 - [ibegin, iend) is an iterator range over input_type objects.
846                 - std::distance(ibegin,iend) > 0
847             ensures
848                 - Converts the iterator range into a tensor and stores it into #data.
849                 - #data.num_samples()%distance(ibegin,iend) == 0.
850                 - #sample_expansion_factor() == #data.num_samples()/distance(ibegin,iend).
851                 - #sample_expansion_factor() > 0
852                 - The data in the ith sample of #data corresponds to the input_type object
853                   *(ibegin+i/sample_expansion_factor()).
854                 - Invokes data.async_copy_to_device() so that the data begins transferring
855                   to the GPU device, if present.
856                 - This function is implemented by calling the to_tensor() routine defined
857                   at the input layer of this network.
858         !*/
859 
860         unsigned int sample_expansion_factor (
861         ) const;
862         /*!
863             ensures
864                 - When to_tensor() is invoked on this network's input layer it converts N
865                   input objects into M samples, all stored inside a resizable_tensor.  It
866                   is always the case that M is some integer multiple of N.
867                   sample_expansion_factor() returns the value of this multiplier.  To be
868                   very specific, it is always true that M==I*N where I is some integer.
869                   This integer I is what is returned by sample_expansion_factor().
870         !*/
871 
872     // -------------
873 
874         const tensor& forward(const tensor& x
875         );
876         /*!
877             requires
878                 - sample_expansion_factor() != 0
879                   (i.e. to_tensor() must have been called to set sample_expansion_factor()
880                   to something non-zero.)
881                 - x.num_samples()%sample_expansion_factor() == 0
882                 - x.num_samples() > 0
883             ensures
884                 - Runs x through the network and returns the results as a tensor.  In particular,
885                   this function just performs:
886                     return subnet().forward(x);
887                   So if you want to get the outputs as an output_label_type then call one of the
888                   methods below instead, like operator().
889                 - The return value from this function is also available in #subnet().get_output().
890                   i.e. this function returns #subnet().get_output().
891                 - have_same_dimensions(#subnet().get_gradient_input(), #subnet().get_output()) == true
892                 - All elements of #subnet().get_gradient_input() are set to 0.
893                   i.e. calling this function clears out #subnet().get_gradient_input() and ensures
894                   it has the same dimensions as the most recent output.
895         !*/
896 
897         template <typename output_iterator>
898         void operator() (
899             const tensor& x,
900             output_iterator obegin
901         );
902         /*!
903             requires
904                 - sample_expansion_factor() != 0
905                   (i.e. to_tensor() must have been called to set sample_expansion_factor()
906                   to something non-zero.)
907                 - x.num_samples()%sample_expansion_factor() == 0
908                 - x.num_samples() > 0
909                 - obegin == iterator pointing to the start of a range of
910                   x.num_samples()/sample_expansion_factor() output_label_type elements.
911             ensures
912                 - runs x through the network and writes the output to the range at obegin.
913                 - loss_details().to_label() is used to write the network output into
914                   obegin.
915         !*/
916 
917         template <typename forward_iterator, typename label_iterator>
918         void operator() (
919             forward_iterator ibegin,
920             forward_iterator iend,
921             label_iterator obegin
922         );
923         /*!
924             requires
925                 - [ibegin, iend) is an iterator range over input_type objects.
926                 - std::distance(ibegin,iend) > 0
927                 - obegin == iterator pointing to the start of a range of
928                   std::distance(ibegin,iend) output_label_type elements.
929             ensures
930                 - runs [ibegin,iend) through the network and writes the output to the range
931                   at obegin.
932                 - loss_details().to_label() is used to write the network output into
933                   obegin.
934         !*/
935 
936     // -------------
937 
938         const output_label_type& operator() (
939             const input_type& x
940         );
941         /*!
942             ensures
943                 - runs a single object, x, through the network and returns the output.
944                 - loss_details().to_label() is used to convert the network output into a
945                   output_label_type.
946         !*/
947 
948         template <typename iterable_type>
949         std::vector<output_label_type> operator() (
950             const iterable_type& data,
951             size_t batch_size = 128
952         );
953         /*!
954             requires
955                 - batch_size > 0
956                 - data must have a .begin() and .end() that supply iterators over a
957                   sequence of input_type elements.  E.g. data could have a type of
958                   std::vector<input_type>
959             ensures
960                 - runs all the objects in data through the network and returns their
961                   predicted labels.  This means this function returns a vector V such that:
962                     - V.size() == data.size()
963                     - for all valid i: V[i] == the predicted label of data[i].
964                 - Elements of data are run through the network in batches of batch_size
965                   items.  Using a batch_size > 1 can be faster because it better exploits
966                   the available hardware parallelism.
967                 - loss_details().to_label() is used to convert the network output into a
968                   output_label_type.
969         !*/
970 
971         template <typename ...T>
972         const output_label_type& process (
973             const input_type& x,
974             T&& ...args
975         );
976         /*!
977             ensures
978                 - This function is just like (*this)(x), i.e. it runs a single object, x,
979                   through the network and returns the output.  But we additionally pass the
980                   given args to loss_details().to_label() as the 4th argument (or more,
981                   depending on how many things are in args) when converting the network
982                   output to an output_label_type.  This is useful, for instance, with loss
983                   layers like loss_mmod_ which has an optional adjust_threshold argument to
984                   to_label() that adjusts the detection threshold.  Therefore, for such
985                   networks you could call them like: net.process(some_image, -0.5), and -0.5
986                   would be passed so the adjust_threshold argument of to_tensor().
987         !*/
988 
989         template <typename iterable_type, typename ...T>
990         std::vector<output_label_type> process_batch (
991             const iterable_type& data,
992             size_t batch_size,
993             T&& ...args
994         );
995         /*!
996             requires
997                 - batch_size > 0
998                 - data must have a .begin() and .end() that supply iterators over a
999                   sequence of input_type elements.  E.g. data could have a type of
1000                   std::vector<input_type>
1001             ensures
1002                 - This function is just like (*this)(data,batch_size), i.e. it runs a
1003                   bunch of objects through the network and returns the outputs.  But we
1004                   additionally pass the given args to loss_details().to_label() as the 4th
1005                   argument (or more, depending on how many things are in args) when
1006                   converting the network output to output_label_types.  This is useful,
1007                   for instance, with loss layers like loss_mmod_ which has an optional
1008                   adjust_threshold argument to to_label() that adjusts the detection
1009                   threshold.  Therefore, for such networks you could call them like:
1010                   net.process_batch(std::vector<image_type>({some_image, another_image}), 128, -0.5),
1011                   and -0.5 would be passed so the adjust_threshold argument of to_tensor().
1012         !*/
1013 
1014     // -------------
1015 
1016         template <typename label_iterator>
1017         double compute_loss (
1018             const tensor& x,
1019             label_iterator lbegin
1020         );
1021         /*!
1022             requires
1023                 - sample_expansion_factor() != 0
1024                   (i.e. to_tensor() must have been called to set sample_expansion_factor()
1025                   to something non-zero.)
1026                 - x.num_samples()%sample_expansion_factor() == 0
1027                 - x.num_samples() > 0
1028                 - lbegin == iterator pointing to the start of a range of
1029                   x.num_samples()/sample_expansion_factor() training_label_type elements.
1030             ensures
1031                 - runs x through the network, compares the output to the expected output
1032                   pointed to by lbegin, and returns the resulting loss.
1033                 - for all valid k:
1034                     - the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor()).
1035                 - This function does not update the network parameters.
1036                 - For sub-layers that are immediate inputs into the loss layer, we also populate the
1037                   sub-layer's get_gradient_input() tensor with the gradient of the loss with respect
1038                   to the sub-layer's output.
1039         !*/
1040 
1041         template <typename forward_iterator, typename label_iterator>
1042         double compute_loss (
1043             forward_iterator ibegin,
1044             forward_iterator iend,
1045             label_iterator lbegin
1046         );
1047         /*!
1048             requires
1049                 - [ibegin, iend) is an iterator range over input_type objects.
1050                 - std::distance(ibegin,iend) > 0
1051                 - lbegin == iterator pointing to the start of a range of
1052                   std::distance(ibegin,iend) training_label_type elements.
1053             ensures
1054                 - runs [ibegin,iend) through the network, compares the output to the
1055                   expected output pointed to by lbegin, and returns the resulting loss.
1056                 - for all valid k:
1057                     - the expected label of *(ibegin+k) is *(lbegin+k).
1058                 - This function does not update the network parameters.
1059                 - For sub-layers that are immediate inputs into the loss layer, we also populate the
1060                   sub-layer's get_gradient_input() tensor with the gradient of the loss with respect
1061                   to the sub-layer's output.
1062         !*/
1063 
1064     // -------------
1065 
1066         double compute_loss (
1067             const tensor& x
1068         );
1069         /*!
1070             requires
1071                 - LOSS_DETAILS is an unsupervised loss.  i.e. training_label_type==no_label_type.
1072                 - sample_expansion_factor() != 0
1073                   (i.e. to_tensor() must have been called to set sample_expansion_factor()
1074                   to something non-zero.)
1075                 - x.num_samples()%sample_expansion_factor() == 0
1076                 - x.num_samples() > 0
1077             ensures
1078                 - runs x through the network and returns the resulting loss.
1079                 - This function does not update the network parameters.
1080                 - For sub-layers that are immediate inputs into the loss layer, we also populate the
1081                   sub-layer's get_gradient_input() tensor with the gradient of the loss with respect
1082                   to the sub-layer's output.
1083         !*/
1084 
1085         template <typename forward_iterator>
1086         double compute_loss (
1087             forward_iterator ibegin,
1088             forward_iterator iend,
1089         );
1090         /*!
1091             requires
1092                 - LOSS_DETAILS is an unsupervised loss.  i.e. training_label_type==no_label_type.
1093                 - [ibegin, iend) is an iterator range over input_type objects.
1094                 - std::distance(ibegin,iend) > 0
1095             ensures
1096                 - runs [ibegin,iend) through the network and returns the resulting loss.
1097                 - This function does not update the network parameters.
1098                 - For sub-layers that are immediate inputs into the loss layer, we also populate the
1099                   sub-layer's get_gradient_input() tensor with the gradient of the loss with respect
1100                   to the sub-layer's output.
1101         !*/
1102 
1103     // -------------
1104 
1105         template <typename label_iterator>
1106         double compute_parameter_gradients (
1107             const tensor& x,
1108             label_iterator lbegin
1109         );
1110         /*!
1111             requires
1112                 - sample_expansion_factor() != 0
1113                   (i.e. to_tensor() must have been called to set sample_expansion_factor()
1114                   to something non-zero.)
1115                 - x.num_samples()%sample_expansion_factor() == 0
1116                 - x.num_samples() > 0
1117                 - lbegin == iterator pointing to the start of a range of
1118                   x.num_samples()/sample_expansion_factor() training_label_type elements.
1119             ensures
1120                 - runs x through the network, compares the output to the expected output
1121                   pointed to by lbegin, and computes parameter and data gradients with
1122                   respect to the loss, via backpropagation.  Specifically, this function
1123                   updates get_final_data_gradient() and also, for each layer, the tensor
1124                   returned by get_parameter_gradient().
1125                 - for all valid k:
1126                     - the expected label of the kth sample in x is *(lbegin+k/sample_expansion_factor()).
1127                 - returns compute_loss(x,lbegin)
1128         !*/
1129 
1130         template <typename forward_iterator, typename label_iterator>
1131         double compute_parameter_gradients (
1132             forward_iterator ibegin,
1133             forward_iterator iend,
1134             label_iterator lbegin
1135         );
1136         /*!
1137             requires
1138                 - [ibegin, iend) is an iterator range over input_type objects.
1139                 - std::distance(ibegin,iend) > 0
1140                 - lbegin == iterator pointing to the start of a range of
1141                   std::distance(ibegin,iend) training_label_type elements.
1142             ensures
1143                 - runs [ibegin,iend) through the network, compares the output to the
1144                   expected output pointed to by lbegin, and computes parameter and data
1145                   gradients with respect to the loss, via backpropagation.  Specifically,
1146                   this function updates get_final_data_gradient() and also, for each layer,
1147                   the tensor returned by get_parameter_gradient().
1148                 - for all valid k:
1149                     - the expected label of *(ibegin+k) is *(lbegin+k).
1150                 - returns compute_loss(ibegin,iend,lbegin)
1151         !*/
1152 
1153         double compute_parameter_gradients (
1154             const tensor& x
1155         );
1156         /*!
1157             requires
1158                 - LOSS_DETAILS is an unsupervised loss.  i.e. training_label_type==no_label_type.
1159                 - sample_expansion_factor() != 0
1160                   (i.e. to_tensor() must have been called to set sample_expansion_factor()
1161                   to something non-zero.)
1162                 - x.num_samples()%sample_expansion_factor() == 0
1163                 - x.num_samples() > 0
1164             ensures
1165                 - runs x through the network and computes parameter and data gradients with
1166                   respect to the loss, via backpropagation.  Specifically, this function
1167                   updates get_final_data_gradient() and also, for each layer, the tensor
1168                   returned by get_parameter_gradient().
1169                 - returns compute_loss(x)
1170         !*/
1171 
1172         template <typename forward_iterator>
1173         double compute_parameter_gradients (
1174             forward_iterator ibegin,
1175             forward_iterator iend
1176         );
1177         /*!
1178             requires
1179                 - LOSS_DETAILS is an unsupervised loss.  i.e. training_label_type==no_label_type.
1180                 - [ibegin, iend) is an iterator range over input_type objects.
1181                 - std::distance(ibegin,iend) > 0
1182             ensures
1183                 - runs [ibegin,iend) through the network and computes parameter and data
1184                   gradients with respect to the loss, via backpropagation.  Specifically,
1185                   this function updates get_final_data_gradient() and also, for each layer,
1186                   the tensor returned by get_parameter_gradient().
1187                 - returns compute_loss(ibegin,iend)
1188         !*/
1189 
1190         template <typename solver_type>
1191         void update_parameters (
1192             sstack<solver_type> solvers,
1193             double learning_rate
1194         );
1195         /*!
1196             requires
1197                 - solver_type is an implementation of the EXAMPLE_SOLVER interface defined
1198                   in solvers_abstract.h
1199                 - compute_parameter_gradients() has been called.
1200                 - The given solvers have only ever been used with this network.  That
1201                   is, if you want to call update_parameters() on some other neural network
1202                   object then you must NOT reuse the same solvers object.
1203                 - solvers.size() >= num_computational_layers
1204                 - 0 < learning_rate <= 1
1205             ensures
1206                 - Updates all the parameters in the network.  In particular, we pass each
1207                   layer's parameter gradient (i.e. the tensor returned by the layer's
1208                   get_parameter_gradient() member) through that layer's corresponding
1209                   solver object.  This produces a parameter delta vector which we add to
1210                   the layer's parameters.
1211                 - The solvers use the given learning rate.
1212         !*/
1213 
1214         template <typename solver_type>
update_parameters(std::vector<solver_type> & solvers,double learning_rate)1215         void update_parameters(std::vector<solver_type>& solvers, double learning_rate
1216         ) { update_parameters(make_sstack(solvers), learning_rate); }
1217         /*!
1218             Convenience method for calling update_parameters()
1219         !*/
1220 
1221         void back_propagate_error(
1222             const tensor& x
1223         );
1224         /*!
1225             requires
1226                 - forward(x) was called to forward propagate x though the network.
1227                   Moreover, this was the most recent call to forward() and x has not been
1228                   subsequently modified in any way.
1229                 - subnet().get_gradient_input() has been set equal to the gradient of this network's
1230                   output with respect to the loss function (generally this will be done by calling
1231                   compute_loss()).
1232             ensures
1233                 - Back propagates the error gradient, subnet().get_gradient_input(), through this
1234                   network and computes parameter and data gradients, via backpropagation.
1235                   Specifically, this function populates get_final_data_gradient() and also,
1236                   for each layer, the tensor returned by get_parameter_gradient().
1237                 - All elements of #subnet().get_gradient_input() are set to 0.
1238                 - have_same_dimensions(#get_final_data_gradient(), x) == true.
1239                 - #get_final_data_gradient() contains the gradient of the network with
1240                   respect to x.
1241         !*/
1242 
1243         void back_propagate_error(
1244             const tensor& x,
1245             const tensor& gradient_input
1246         );
1247         /*!
1248             requires
1249                 - forward(x) was called to forward propagate x though the network.
1250                   Moreover, this was the most recent call to forward() and x has not been
1251                   subsequently modified in any way.
1252                 - have_same_dimensions(gradient_input, subnet().get_output()) == true
1253             ensures
1254                 - This function is identical to the version of back_propagate_error()
1255                   defined immediately above except that it back-propagates gradient_input
1256                   through the network instead of subnet().get_gradient_input().  Therefore, this
1257                   version of back_propagate_error() is equivalent to performing:
1258                     subnet().get_gradient_input() = gradient_input;
1259                     back_propagate_error(x);
1260                   Except that calling back_propagate_error(x,gradient_input) avoids the
1261                   copy and is therefore slightly more efficient.
1262                 - All elements of #subnet.get_gradient_input() are set to 0.
1263                 - have_same_dimensions(#get_final_data_gradient(), x) == true.
1264                 - #get_final_data_gradient() contains the gradient of the network with
1265                   respect to x.
1266         !*/
1267 
1268         const tensor& get_final_data_gradient(
1269         ) const;
1270         /*!
1271             ensures
1272                 - if back_propagate_error() has been called to back-propagate a gradient
1273                   through this network then you can call get_final_data_gradient() to
1274                   obtain the last data gradient computed.  That is, this function returns
1275                   the gradient of the network with respect to its inputs.
1276                 - Note that there is only one "final data gradient" for an entire network,
1277                   not one per layer, since there is only one input to the entire network.
1278         !*/
1279 
1280 
1281     // -------------
1282 
1283         void clean (
1284         );
1285         /*!
1286             ensures
1287                 - Causes the network to forget about everything but its parameters.
1288                 - invokes subnet().clean()
1289         !*/
1290     };
1291 
1292     template <typename T, typename U>
1293     std::ostream& operator<<(std::ostream& out, const add_loss_layer<T,U>& item);
1294     /*!
1295         prints the network architecture to the given output stream.
1296     !*/
1297 
1298     template <typename T, typename U>
1299     void serialize(const add_loss_layer<T,U>& item, std::ostream& out);
1300     template <typename T, typename U>
1301     void deserialize(add_loss_layer<T,U>& item, std::istream& in);
1302     /*!
1303         provides serialization support
1304     !*/
1305 
1306 // ----------------------------------------------------------------------------------------
1307 // ----------------------------------------------------------------------------------------
1308 // ----------------------------------------------------------------------------------------
1309 
1310     template <typename ...T>
1311     decorator_repeat_group<T...> repeat_group (
1312         T&& ...args
1313     );
1314     /*!
1315         ensures
1316             - Decorates a group of variables.  This is essentially like std::make_tuple()
1317               except it's only purpose is to group variables together so they can be passed
1318               to the repeat object's constructor.
1319     !*/
1320 
1321     template <
1322         size_t num,
1323         template<typename> class REPEATED_LAYER,
1324         typename SUBNET
1325         >
1326     class repeat
1327     {
1328         /*!
1329             REQUIREMENTS ON num
1330                 - num > 0
1331 
1332             REQUIREMENTS ON REPEATED_LAYER
1333                 - REPEATED_LAYER must be a template that stacks more layers onto a deep neural
1334                   network.  For example, if net_type were a network without a loss layer,
1335                   then it should be legal to create a deeper network with a type of
1336                   REPEATED_LAYER<net_type>.
1337 
1338             REQUIREMENTS ON SUBNET
1339                 - One of the following must be true:
1340                     - SUBNET is an add_layer object.
1341                     - SUBNET is an add_tag_layer object.
1342                     - SUBNET is an add_skip_layer object.
1343                     - SUBNET is a repeat object.
1344 
1345             WHAT THIS OBJECT REPRESENTS
1346                 This object adds more layers to a deep neural network.  In particular, it
1347                 adds REPEATED_LAYER on top of SUBNET num times.  So for example, if num were 2 then
1348                 repeat<2,REPEATED_LAYER,SUBNET> would create a network equivalent to REPEATED_LAYER<REPEATED_LAYER<SUBNET>>.
1349 
1350                 Also, this object provides an interface identical to the one defined by the
1351                 add_layer object except that we add the num_repetitions() and
1352                 get_repeated_layer() methods.  These additions are shown below along with
1353                 some additional explanatory comments.
1354         !*/
1355 
1356     public:
1357 
1358         typedef SUBNET subnet_type;
1359         typedef typename SUBNET::input_type input_type;
1360         const static size_t num_computational_layers = (REPEATED_LAYER<SUBNET>::num_computational_layers-SUBNET::num_computational_layers)*num + SUBNET::num_computational_layers;
1361         const static size_t num_layers = (REPEATED_LAYER<SUBNET>::num_layers-SUBNET::num_layers)*num + SUBNET::num_layers;
1362         typedef REPEATED_LAYER<an_unspecified_input_type> repeated_layer_type;
1363 
1364         template <typename T, typename ...U>
1365         repeat(
1366             T arg1,
1367             U ...args2
1368         );
1369         /*!
1370             ensures
1371                 - arg1 is used to initialize the num_repetitions() copies of REPEATED_LAYER inside
1372                   this object.  That is, all the REPEATED_LAYER elements are initialized identically
1373                   by being given copies of arg1.
1374                 - The rest of the arguments to the constructor, i.e. args2, are passed to
1375                   SUBNET's constructor.
1376         !*/
1377 
1378         template <typename ...T, typename ...U>
1379         repeat(
1380             decorator_repeat_group<T...>&& arg1,
1381             U ...args2
1382         );
1383         /*!
1384             ensures
1385                 - arg1 is used to initialize the num_repetitions() copies of REPEATED_LAYER inside
1386                   this object.  That is, all the REPEATED_LAYER elements are initialized identically
1387                   by being given copies of an undecorated arg1.
1388                 - The rest of the arguments to the constructor, i.e. args2, are passed to
1389                   SUBNET's constructor.
1390         !*/
1391 
1392         size_t num_repetitions (
1393         ) const;
1394         /*!
1395             ensures
1396                 - returns num (i.e. the number of times REPEATED_LAYER was stacked on top of SUBNET)
1397         !*/
1398 
1399         const repeated_layer_type& get_repeated_layer (
1400             size_t i
1401         ) const;
1402         /*!
1403             requires
1404                 - i < num_repetitions()
1405             ensures
1406                 - returns a reference to the i-th instance of REPEATED_LAYER.  For example,
1407                   get_repeated_layer(0) returns the instance of REPEATED_LAYER that is on the top of
1408                   the network while get_repeated_layer(num_repetitions()-1) returns the
1409                   instance of REPEATED_LAYER that is stacked immediately on top of SUBNET.
1410         !*/
1411 
1412         repeated_layer_type& get_repeated_layer (
1413             size_t i
1414         );
1415         /*!
1416             requires
1417                 - i < num_repetitions()
1418             ensures
1419                 - returns a reference to the i-th instance of REPEATED_LAYER.  For example,
1420                   get_repeated_layer(0) returns the instance of REPEATED_LAYER that is on the top of
1421                   the network while get_repeated_layer(num_repetitions()-1) returns the
1422                   instance of REPEATED_LAYER that is stacked immediately on top of SUBNET.
1423         !*/
1424 
1425         const subnet_type& subnet(
1426         ) const;
1427         /*!
1428             ensures
1429                 - returns the SUBNET base network that repeat sits on top of.  If you want
1430                   to access the REPEATED_LAYER components then you must use get_repeated_layer().
1431         !*/
1432 
1433         subnet_type& subnet(
1434         );
1435         /*!
1436             ensures
1437                 - returns the SUBNET base network that repeat sits on top of.  If you want
1438                   to access the REPEATED_LAYER components then you must use get_repeated_layer().
1439         !*/
1440     };
1441 
1442     template < size_t num, template<typename> class T, typename U >
1443     std::ostream& operator<<(std::ostream& out, const repeat<num,T,U>& item);
1444     /*!
1445         prints the network architecture to the given output stream.
1446     !*/
1447 
1448     template < size_t num, template<typename> class T, typename U >
1449     void serialize(const repeat<num,T,U>& item, std::ostream& out);
1450     template < size_t num, template<typename> class T, typename U >
1451     void deserialize(repeat<num,T,U>& item, std::istream& in);
1452     /*!
1453         provides serialization support
1454     !*/
1455 
1456 // ----------------------------------------------------------------------------------------
1457 
1458     template <
1459         unsigned long ID,
1460         typename SUBNET
1461         >
1462     class add_tag_layer
1463     {
1464         /*!
1465             REQUIREMENTS ON SUBNET
1466                 - One of the following must be true:
1467                     - SUBNET implements the EXAMPLE_INPUT_LAYER interface defined in
1468                       input_abstract.h.
1469                     - SUBNET is an add_layer object.
1470                     - SUBNET is an add_tag_layer object.
1471                     - SUBNET is an add_skip_layer object.
1472                     - SUBNET is a repeat object.
1473 
1474             WHAT THIS OBJECT REPRESENTS
1475                 This object adds a new layer to a deep neural network.  However, this layer
1476                 simply performs the identity transform.  This means it is a no-op and its
1477                 presence does not change the behavior of the network.  It exists solely to
1478                 be used by add_skip_layer to reference a particular part of a network.
1479 
1480                 Also, this object provides an interface identical to the one defined by the
1481                 add_layer object.
1482         !*/
1483     };
1484 
1485     template <unsigned long ID, typename U>
1486     std::ostream& operator<<(std::ostream& out, const add_tag_layer<ID,U>& item);
1487     /*!
1488         prints the network architecture to the given output stream.
1489     !*/
1490 
1491     template <unsigned long ID, typename U>
1492     void serialize(const add_tag_layer<ID,U>& item, std::ostream& out);
1493     template <unsigned long ID, typename U>
1494     void deserialize(add_tag_layer<ID,U>& item, std::istream& in);
1495     /*!
1496         provides serialization support
1497     !*/
1498 
1499     template <typename SUBNET> using tag1  = add_tag_layer< 1, SUBNET>;
1500     template <typename SUBNET> using tag2  = add_tag_layer< 2, SUBNET>;
1501     template <typename SUBNET> using tag3  = add_tag_layer< 3, SUBNET>;
1502     template <typename SUBNET> using tag4  = add_tag_layer< 4, SUBNET>;
1503     template <typename SUBNET> using tag5  = add_tag_layer< 5, SUBNET>;
1504     template <typename SUBNET> using tag6  = add_tag_layer< 6, SUBNET>;
1505     template <typename SUBNET> using tag7  = add_tag_layer< 7, SUBNET>;
1506     template <typename SUBNET> using tag8  = add_tag_layer< 8, SUBNET>;
1507     template <typename SUBNET> using tag9  = add_tag_layer< 9, SUBNET>;
1508     template <typename SUBNET> using tag10 = add_tag_layer<10, SUBNET>;
1509 
1510     template <template<typename SUBNET> class tag>
1511     struct tag_id
1512     {
1513         /*!
1514             REQUIREMENTS ON tag
1515                 Tag should be an add_tag_layer template such as tag1, tag2, etc.
1516 
1517             WHAT THIS OBJECT REPRESENTS
1518                 This is a tool for finding the numeric ID of a tag layer.  For example,
1519                 tag_id<tag3>::id == 3.
1520         !*/
1521 
1522         const static unsigned long id;
1523     };
1524 
1525 // ----------------------------------------------------------------------------------------
1526 
1527     template <
1528         template<typename> class TAG_TYPE,
1529         typename SUBNET
1530         >
1531     class add_skip_layer
1532     {
1533         /*!
1534             REQUIREMENTS ON SUBNET
1535                 - One of the following must be true:
1536                     - SUBNET is an add_layer object.
1537                     - SUBNET is an add_tag_layer object.
1538                     - SUBNET is an add_skip_layer object.
1539                     - SUBNET is a repeat object.
1540 
1541             WHAT THIS OBJECT REPRESENTS
1542                 This object adds a new layer to a deep neural network which draws its
1543                 inputs from layer<TAG_TYPE>(subnet()) and performs the identity transform.
1544 
1545                 Also, this object provides an interface identical to the one defined by the
1546                 add_layer object.
1547         !*/
1548     };
1549 
1550     template <template<typename> class T, typename U>
1551     std::ostream& operator<<(std::ostream& out, const add_skip_layer<T,U>& item);
1552     /*!
1553         prints the network architecture to the given output stream.
1554     !*/
1555 
1556     template <template<typename> class T, typename U>
1557     void serialize(const add_skip_layer<T,U>& item, std::ostream& out);
1558     template <template<typename> class T, typename U>
1559     void deserialize(add_skip_layer<T,U>& item, std::istream& in);
1560     /*!
1561         provides serialization support
1562     !*/
1563 
1564     template <typename SUBNET> using skip1  = add_skip_layer< tag1, SUBNET>;
1565     template <typename SUBNET> using skip2  = add_skip_layer< tag2, SUBNET>;
1566     template <typename SUBNET> using skip3  = add_skip_layer< tag3, SUBNET>;
1567     template <typename SUBNET> using skip4  = add_skip_layer< tag4, SUBNET>;
1568     template <typename SUBNET> using skip5  = add_skip_layer< tag5, SUBNET>;
1569     template <typename SUBNET> using skip6  = add_skip_layer< tag6, SUBNET>;
1570     template <typename SUBNET> using skip7  = add_skip_layer< tag7, SUBNET>;
1571     template <typename SUBNET> using skip8  = add_skip_layer< tag8, SUBNET>;
1572     template <typename SUBNET> using skip9  = add_skip_layer< tag9, SUBNET>;
1573     template <typename SUBNET> using skip10 = add_skip_layer<tag10, SUBNET>;
1574 
1575 // ----------------------------------------------------------------------------------------
1576 
1577     template <
1578         unsigned int i,
1579         typename net_type
1580         >
1581     auto& layer (
1582         net_type& n
1583     );
1584     /*!
1585         requires
1586             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1587               add_tag_layer.
1588             - i < net_type::num_layers
1589         ensures
1590             - This function allows you to access any layer in a network by its layer index
1591               i.  Therefore, it will walk i steps down the network and return the layer
1592               object there.  Since networks can be big, the best way to find layer index
1593               numbers is to print a network to the screen since the print out will include
1594               indexes for each layer.
1595             - In general, this function chains together i calls to n.subnet() and returns
1596               the result.  So for example:
1597                 - if (i == 0)
1598                     - returns n
1599                 - else if (i == 1)
1600                     - returns n.subnet()
1601                 - else if (i == 2)
1602                     - returns n.subnet().subnet()
1603                 - else if (i == 3)
1604                     - returns n.subnet().subnet().subnet()
1605                 - else
1606                     - etc.
1607               Except that when it hits a repeat layer it recurses into the repeated layers
1608               contained inside.  That is, if the layer index indicates a layer in a repeat
1609               object this function will make the appropriate call to get_repeated_layer()
1610               and do the right thing.
1611     !*/
1612 
1613     template <
1614         template<typename> class Match,
1615         typename net_type
1616         >
1617     auto& layer (
1618         net_type& n
1619     );
1620     /*!
1621         requires
1622             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1623               add_tag_layer.
1624         ensures
1625             - returns the first layer in n that is of type Match.  E.g. if net_type is
1626               fc<relu<fc<input<sample_type>>>> then calling layer<relu>(n) would return
1627               layer<1>(n), that is, a reference to the relu layer.
1628     !*/
1629 
1630     template <
1631         template<typename> class Match,
1632         unsigned int i,
1633         typename net_type
1634         >
1635     auto& layer (
1636         net_type& n
1637     );
1638     /*!
1639         requires
1640             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1641               add_tag_layer.
1642         ensures
1643             - returns layer<i>(layer<Match>(n))
1644     !*/
1645 
1646 // ----------------------------------------------------------------------------------------
1647 
1648     template <typename net_type>
1649     auto& input_layer (
1650         net_type& net
1651     );
1652     /*!
1653         requires
1654             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1655               add_tag_layer.
1656         ensures
1657             - returns the input later of the given network object.  Specifically, this
1658               function is equivalent to calling:
1659                 layer<net_type::num_layers-1>(net);
1660               That is, you get the input layer details object for the network.
1661     !*/
1662 
1663 // ----------------------------------------------------------------------------------------
1664 
1665     template <
1666         typename net_type,
1667         typename visitor
1668         >
1669     void visit_layer_parameters(
1670         net_type& net,
1671         visitor v
1672     );
1673     /*!
1674         requires
1675             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1676               add_tag_layer.
1677             - v is a function object with a signature equivalent to:
1678                 v(size_t idx, tensor& t)
1679               or:
1680                 v(tensor& t)
1681         ensures
1682             - Loops over all the computational layers (i.e. layers with parameters, as
1683               opposed to loss, tag, or input layers) in net and passes their parameters to
1684               v().  To be specific, this function essentially performs the following:
1685 
1686                 size_t computational_layer_idx = 0;
1687                 for (size_t i = 0; i < net_type::num_layers; ++i)
1688                 {
1689                     if (layer<i>(net) is a computational layer)
1690                     {
1691                         v(computational_layer_idx, layer<i>(net).layer_details().get_layer_params());
1692                         ++computational_layer_idx;
1693                     }
1694                 }
1695             - When v() is called, the first argument is always < net_type::num_computational_layers.
1696     !*/
1697 
1698 // ----------------------------------------------------------------------------------------
1699 
1700     template <
1701         typename net_type,
1702         typename visitor
1703         >
1704     void visit_layer_parameter_gradients(
1705         net_type& net,
1706         visitor v
1707     );
1708     /*!
1709         requires
1710             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1711               add_tag_layer.
1712             - v is a function object with a signature equivalent to:
1713                 v(size_t idx, tensor& t)
1714               or:
1715                 v(tensor& t)
1716         ensures
1717             - Loops over all the computational layers (i.e. layers with parameters, as
1718               opposed to loss, tag, or input layers) in net and passes their parameter
1719               gradients to v().  To be specific, this function essentially performs the
1720               following:
1721 
1722                 size_t computational_layer_idx = 0;
1723                 for (size_t i = 0; i < net_type::num_layers; ++i)
1724                 {
1725                     if (layer<i>(net) is a computational layer)
1726                     {
1727                         v(computational_layer_idx, layer<i>(net).get_parameter_gradient());
1728                         ++computational_layer_idx;
1729                     }
1730                 }
1731             - When v() is called, the first argument is always < net_type::num_computational_layers.
1732     !*/
1733 
1734 // ----------------------------------------------------------------------------------------
1735 
1736     template <
1737         typename net_type,
1738         typename visitor
1739         >
1740     void visit_layers(
1741         net_type& net,
1742         visitor v
1743     );
1744     /*!
1745         requires
1746             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1747               add_tag_layer.
1748             - v is a function object with a signature equivalent to:
1749                 v(size_t idx, any_net_type& t)
1750               or:
1751                 v(any_net_type& t)
1752               That is, it takes an optional size_t and then any of the network types such as
1753               add_layer, add_loss_layer, etc.
1754         ensures
1755             - Loops over all the layers in net and calls v() on them.  To be specific, this
1756               function essentially performs the following:
1757 
1758                 for (size_t i = 0; i < net_type::num_layers; ++i)
1759                     v(i, layer<i>(net));
1760     !*/
1761 
1762     template <
1763         typename net_type,
1764         typename visitor
1765         >
1766     void visit_layers_backwards(
1767         net_type& net,
1768         visitor v
1769     );
1770     /*!
1771         requires
1772             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1773               add_tag_layer.
1774             - v is a function object with a signature equivalent to:
1775                 v(size_t idx, any_net_type& t)
1776               or:
1777                 v(any_net_type& t)
1778               That is, it takes an optional size_t and then any of the network types such as
1779               add_layer, add_loss_layer, etc.
1780         ensures
1781             - Loops over all the layers in net and calls v() on them.  The loop happens in
1782               the reverse order of visit_layers().  To be specific, this function
1783               essentially performs the following:
1784 
1785                 for (size_t i = net_type::num_layers; i != 0; --i)
1786                     v(i-1, layer<i-1>(net));
1787     !*/
1788 
1789 // ----------------------------------------------------------------------------------------
1790 
1791     template <
1792         typename net_type,
1793         typename visitor
1794         >
1795     void visit_computational_layers(
1796         net_type& net,
1797         visitor v
1798     );
1799     /*!
1800         requires
1801             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1802               add_tag_layer.
1803             - v is a function object with a signature equivalent to:
1804                 v(size_t idx, any_computational_layer& t)
1805               or:
1806                 v(any_computational_layer& t)
1807               That is, it takes an optional size_t and then any of the computational layers.  E.g.
1808               one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
1809         ensures
1810             - Loops over all the computational layers in net and calls v() on them.  To be specific, this
1811               function essentially performs the following:
1812 
1813                 for (size_t i = 0; i < net_type::num_layers; ++i)
1814                     if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
1815                         v(i, layer<i>(net).layer_details());
1816     !*/
1817 
1818     template <
1819         size_t begin,
1820         size_t end,
1821         typename net_type,
1822         typename visitor
1823         >
1824     void visit_computational_layers_range(
1825         net_type& net,
1826         visitor v
1827     );
1828     /*!
1829         requires
1830             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1831               add_tag_layer.
1832             - v is a function object with a signature equivalent to:
1833                 v(size_t idx, any_computational_layer& t)
1834               or:
1835                 v(any_computational_layer& t)
1836               That is, it takes an optional size_t and then any of the computational layers.  E.g.
1837               one of the layer types defined in dlib/dnn/layers_abstract.h like fc_ or conv_.
1838         ensures
1839             - Loops over all the computational layers in the range [begin,end) in net and calls v()
1840               on them.  To be specific, this function essentially performs the following:
1841 
1842                 for (size_t i = begin; i < end; ++i)
1843                     if (layer<i>(net) is an add_layer type, i.e. it adds a computational layer)
1844                         v(i, layer<i>(net).layer_details());
1845     !*/
1846 
1847 // ----------------------------------------------------------------------------------------
1848 
1849     template <
1850         size_t begin,
1851         size_t end,
1852         typename net_type,
1853         typename visitor
1854         >
1855     void visit_layers_range(
1856         net_type& net,
1857         visitor v
1858     );
1859     /*!
1860         requires
1861             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1862               add_tag_layer.
1863             - v is a function object with a signature equivalent to:
1864                 v(size_t idx, any_net_type& t)
1865               or:
1866                 v(any_net_type& t)
1867               That is, it takes an optional size_t and then any of the network types such as
1868               add_layer, add_loss_layer, etc.
1869             - begin <= end <= net_type::num_layers
1870         ensures
1871             - Loops over the layers in the range [begin,end) in net and calls v() on them.
1872               To be specific, this function essentially performs the following:
1873 
1874                 for (size_t i = begin; i < end; ++i)
1875                     v(i, layer<i>(net));
1876     !*/
1877 
1878     template <
1879         size_t begin,
1880         size_t end,
1881         typename net_type,
1882         typename visitor
1883         >
1884     void visit_layers_backwards_range(
1885         net_type& net,
1886         visitor v
1887     );
1888     /*!
1889         requires
1890             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1891               add_tag_layer.
1892             - v is a function object with a signature equivalent to:
1893                 v(size_t idx, any_net_type& t)
1894               or:
1895                 v(any_net_type& t)
1896               That is, it takes an optional size_t and then any of the network types such as
1897               add_layer, add_loss_layer, etc.
1898             - begin <= end <= net_type::num_layers
1899         ensures
1900             - Loops over the layers in the range [begin,end) in net and calls v() on them.
1901               The loop happens in the reverse order of visit_layers_range().  To be specific,
1902               this function essentially performs the following:
1903 
1904                 for (size_t i = end; i != begin; --i)
1905                     v(i-1, layer<i-1>(net));
1906     !*/
1907 
1908 // ----------------------------------------------------------------------------------------
1909 
1910     template <
1911         unsigned long tag_id,
1912         typename net_type,
1913         typename visitor
1914         >
1915     void visit_layers_until_tag(
1916         net_type& net,
1917         visitor v
1918     );
1919     /*!
1920         requires
1921             - net_type is an object of type add_layer, add_loss_layer, add_skip_layer, or
1922               add_tag_layer.
1923             - v is a function object with a signature equivalent to:
1924                 v(any_net_type& t)
1925               That is, it must take any of the network types such as add_layer,
1926               add_loss_layer, etc.
1927         ensures
1928             - Loops over all the layers in net beginning with layer<0>(net) and going until
1929               a tag layer with an ID of tag_id is encountered.  To be specific, this
1930               function essentially performs the following:
1931 
1932                 size_t i = 0;
1933                 while(layer<i>(net) isn't an add_tag_layer with ID == tag_id) {
1934                     v(layer<i>(net));
1935                     ++i;
1936                 }
1937                 v(layer<i>(net));  // also visits the tag layer itself at the very end.
1938     !*/
1939 
1940 // ----------------------------------------------------------------------------------------
1941 
1942     struct layer_test_results
1943     {
1944         std::string log;
1945         bool was_good;
1946 
1947         operator bool() const { return was_good; }
1948     };
1949 
1950     inline std::ostream& operator<< (std::ostream& out, const layer_test_results& item)
1951     {
1952         out << item.log;
1953         return out;
1954     }
1955 
1956     template <
1957         typename layer_details_type
1958         >
1959     layer_test_results test_layer (
1960         layer_details_type l
1961     );
1962     /*!
1963         ensures
1964             - Checks if l correctly implements the EXAMPLE_COMPUTATIONAL_LAYER_ interface
1965               defined in layers_abstract.h.  Importantly, it computes numerical approximations
1966               to the gradients and compares them to the outputs of the layer.
1967             - The results of the testing are returned.  In particular, if the returned object
1968               is RESULT then we will have:
1969                 - RESULT.was_good == false if and only if the layer failed the testing.
1970                 - RESULT.log == a string describing why the testing failed if was_good==false.
1971             - Note that this function is only capable of checking layers that take
1972               arbitrary subnetworks as input.  So if you have designed a layer that expects
1973               only a certain restricted type of subnetwork then you might get a compile or
1974               runtime error when you call this function.
1975     !*/
1976 
1977 // ----------------------------------------------------------------------------------------
1978 
1979 }
1980 
1981 #endif // DLIB_DNn_CORE_ABSTRACT_H_
1982 
1983