1 // This file is part of OpenCV project.
2 // It is subject to the license terms in the LICENSE file found in the top-level directory
3 // of this distribution and at http://opencv.org/license.html.
4 
5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP
6 #define OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP
7 
8 #include "nvcc_defs.hpp"
9 #include "memory.hpp"
10 #include "cublas.hpp"
11 #include "cudnn.hpp"
12 #include "span.hpp"
13 
14 #include "../cxx_utils/resizable_static_array.hpp"
15 #include "../cxx_utils/is_iterator.hpp"
16 
17 #include <opencv2/core.hpp>
18 
19 #include <cstddef>
20 #include <cstdint>
21 #include <type_traits>
22 #include <array>
23 #include <functional>
24 #include <algorithm>
25 #include <numeric>
26 #include <iterator>
27 #include <vector>
28 #include <utility>
29 
30 #ifndef CSL_MAX_TENSOR_RANK
31     #define CSL_MAX_TENSOR_RANK 6
32 #endif
33 
34 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl {
35 
36     /** \file tensor.hpp
37      *
38      *     TYPE     | OWNERSHIP | MUTABLE
39      * ------------ + --------- + --------
40      *    Tensor    |    Yes    |   Yes
41      *  TensorSpan  |    No     |   Yes
42      *  TensorView  |    No     |   No
43      *
44      * Tensor is implicitly convertible to TensorSpan and TensorView
45      * TensorSpan is implicitly convertible to TensorView
46      *
47      * Concepts and template parameter naming convention:
48      * - "MutableTensorType" can refer to a Tensor or TensorSpan
49      * - "ImmutableTensorType" can refer to a Tensor, TensorSpan or TensorView
50      * - "TensorType" can refer to a Tensor, TensorSpan or TensorView
51      *
52      * "ImmutableTensorType" is used when the tensor data might be used.
53      * "TensorType" is used when only meta-information such as the size or shape is required, i.e. the data won't be touched
54      */
55 
56     /** if the \p axis is a negative index, the equivalent positive index is returned; otherwise, returns \p axis */
clamp_axis(int axis,std::size_t rank)57     CUDA4DNN_HOST_DEVICE constexpr std::size_t clamp_axis(int axis, std::size_t rank) {
58         return axis < 0 ? axis + rank : axis;
59     }
60 
61     /** @brief multi-dimensional contiguous non-copyable GPU tensor
62      *
63      * \tparam  T       type of data stored
64      *
65      * @note scalars or zero rank tensors are not supported
66      * @note the maximum rank supported is controlled by the `CSL_MAX_TENSOR_RANK` preprocessor symbol
67      */
68     template <class T>
69     class Tensor {
70         static_assert(std::is_standard_layout<T>::value, "T must satisfy StandardLayoutType");
71 
72     public:
73         using value_type    = typename ManagedPtr<T>::element_type;
74         using pointer       = typename ManagedPtr<value_type>::pointer;
75         using const_pointer = typename ManagedPtr<value_type>::const_pointer;
76         using size_type     = typename ManagedPtr<value_type>::size_type;
77 
Tensor()78         Tensor() noexcept { }
79         Tensor(const Tensor&) = delete;
Tensor(Tensor && other)80         Tensor(Tensor&& other) noexcept {
81             data = std::move(other.data);
82             shape = other.shape;
83             other.shape.clear();
84         }
85 
86         /** @brief constructs a tensor of a specific shape
87          *
88          * Whatever arguments are accepted by the resize methods are accepted here.
89          */
90         template <class ...Args>
Tensor(Args &&...sizes)91         Tensor(Args&&... sizes) { resize(std::forward<Args>(sizes)...); }
92 
93         Tensor& operator=(const Tensor&) = delete;
operator =(Tensor && other)94         Tensor& operator=(Tensor&& other) noexcept {
95             data = std::move(other.data);
96             shape = other.shape;
97             other.shape.clear();
98             return *this;
99         }
100 
101         /** returns true if the tensor is empty (or uninitialized) */
empty() const102         bool empty() const noexcept { return shape.size() == 0; }
103 
104         /** returns the total number of elements in the tensor
105          *
106          * Pre-conditions:
107          * - tensor must be non-empty
108          */
size() const109         size_type size() const noexcept {
110             CV_Assert(!empty());
111             return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
112         }
113 
114         /** returns the rank of the tensor
115          *
116          * Pre-conditions:
117          * - tensor must be non-empty
118          */
rank() const119         size_type rank() const noexcept {
120             CV_Assert(!empty());
121             return shape.size();
122         }
123 
124         /** @brief returns the length of the axis
125          *
126          * Every axis is assigned a zero-based index which can be used to select an axis.
127          * Negative index can be used to select an axis from the end.
128          *
129          * Examples:
130          * > -1 represents the last axis
131          * > 0 represents the first axis
132          * > 1 represents the second axis
133          *
134          * Pre-conditions:
135          * - tensor must be non-empty
136          * - the axis must be in the range [-rank(), rank())
137          */
get_axis_size(int axis) const138         size_type get_axis_size(int axis) const noexcept {
139             axis = clamp_axis(axis, rank());
140             CV_Assert(axis >= 0 && axis < rank());
141             return shape[axis];
142         }
143 
144         /** @brief returns the combined size of the axes in an axis range
145          *
146          * if the shape is [3 x 5 x 7 x 11]
147          * - `size_range(0, 2)` will return 3 x 5 = 15
148          * - `size_range(1, 3)` will return 5 x 7 = 35
149          * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
150          *
151          * Pre-conditions:
152          * - tensor must be non-empty
153          * - `axis_start` must be less than or equal to `axis_end`
154          * - `axis_end` must be less than or equal to the rank
155          *
156          * returns one if the two `axis_start` and `axis_end` are equal
157          */
size_range(size_type axis_start,size_type axis_end) const158         size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
159             CV_Assert(!empty());
160             CV_Assert(axis_start <= axis_end);
161             CV_Assert(axis_end <= rank());
162             auto start = std::begin(shape) + axis_start;
163             auto end = std::begin(shape) + axis_end;
164             return std::accumulate(start, end, 1, std::multiplies<size_type>());
165         }
166 
167         /** returns an std::vector containing axis lengths starting from axis zero
168          *
169          * Pre-conditions:
170          * - tensor must be non-empty
171          *
172          * Exception Guarantee: Strong
173          */
shape_as_vector() const174         std::vector<size_type> shape_as_vector() const {
175             CV_Assert(!empty());
176             return std::vector<size_type>(std::begin(shape), std::end(shape));
177         }
178 
179         /** returns a pointer to mutable device memory owned by the tensor */
get()180         pointer get() noexcept { return data.get(); }
181 
182         /** returns a pointer to immutable device memory owned by the tensor */
get() const183         const_pointer get() const noexcept { return data.get(); }
184 
185         /** @brief releases the memory owned by the tensor
186          *
187          * Pre-conditions:
188          * - tensor must be non-empty
189          *
190          * Exception Guarantee: Strong
191          */
clear()192         void clear() {
193             CV_Assert(!empty());
194             data.reset();
195             shape.clear();
196         }
197 
198         /** @brief resizes the tensor
199          *
200          * Pre-conditions:
201          * - [start, end) represents a forward range containing the length of the axes in order starting from axis zero
202          * - number of lengths provided must not exceed the maximum tensor rank (CSL_MAX_TENSOR_RANK)
203          * - the sizes must be positive integers
204          *
205          * Exception Guarantee: Strong
206          */
207         template <class ForwardItr>
208         typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
resize(ForwardItr start,ForwardItr end)209         ::type resize(ForwardItr start, ForwardItr end) {
210             CV_Assert(start != end);
211             CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
212 
213             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
214             auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
215             data.reset(total);
216 
217             shape.assign(start, end);
218         }
219 
220         /** @brief resizes the tensor
221          * constructs a range out of the arguments and invokes the range-based resize method
222          */
223         template <class ...Sizes>
resize(Sizes...new_sizes_)224         void resize(Sizes... new_sizes_) {
225             static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
226             static_assert(sizeof...(Sizes) > 0, "no sizes provided");
227             std::array<size_type, sizeof...(Sizes)> new_sizes = { static_cast<size_type>(new_sizes_)... };
228             resize(std::begin(new_sizes), std::end(new_sizes));
229         }
230 
231         /** @brief resizes the tensor
232          *
233          * Pre-conditions:
234          * - the reference tensor must be non-empty
235          *
236          * Exception Guarantee: Strong
237          */
238         template <class TensorType>
resize_as(const TensorType & tensor)239         void resize_as(const TensorType& tensor) {
240             CV_Assert(!tensor.empty());
241             cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
242             for (int i = 0; i < new_sizes.size(); i++)
243                 new_sizes[i] = tensor.get_axis_size(i);
244             resize(std::begin(new_sizes), std::end(new_sizes));
245         }
246 
247         /** @brief reshapes the tensor
248          *
249          * Length deduction:
250          * The length of at most one axis can be deduced using the total size constraint. The axis can
251          * be marked for deduction by specifying the size as -1.
252          *
253          * The axes for which no size was provided (excluding -1) will be assumed to be one.
254          *
255          * Pre-conditions:
256          * - the tensor must be non-empty
257          * - [start, end) represents a forward range containing the length of the axes starting from axis zero
258          * - the number of lengths provided must be less than or equal to the tensor rank
259          * - at most one axis length is allowed for length deduction
260          * - the lengths provided must ensure that the total number of elements remains unchanged
261          *
262          * Exception Guarantee: Strong
263          */
264         template <class ForwardItr>
265         typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
reshape(ForwardItr start,ForwardItr end)266         ::type reshape(ForwardItr start, ForwardItr end) {
267             CV_Assert(start != end);
268             CV_Assert(std::distance(start, end) <= rank());
269 
270             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
271 
272             /* the user may leave at most one axis size for deduction by specifying -1 */
273             auto sizes_to_deduce = std::count(start, end, -1);
274             if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
275 
276             /* sizes must be positive numbers with the exception of -1 */
277             auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
278                 return !(x > 0 || x == -1);
279             });
280             if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
281 
282             /* compute the total number of elements in the new tensor */
283             size_type unknown_size = 0;
284             auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
285             if (total < 0) {
286                 /* there is an unknown size */
287                 if (std::abs(total) <= size()) {
288                     unknown_size = size() / std::abs(total);
289                     total = size();
290                 }
291                 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
292                 ** Since `total` is negative, the size check which follows will fail and throw an error
293                 */
294             }
295 
296             /* the number of elements before and after reshape must be exactly same */
297             if (total != size()) {
298                 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
299             }
300 
301             /* we assume the size of the unspecified axes to be one */
302             std::fill(std::begin(shape), std::end(shape), 1);
303             std::copy_backward(start, end, std::end(shape));
304 
305             /* replace the unknown axis with the correct value */
306             std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
307         }
308 
309         /** @brief reshapes the tensor
310          * constructs a range out of the arguments and invokes range-based reshape method
311          */
312         template <class ...Sizes>
reshape(Sizes...new_sizes_)313         void reshape(Sizes... new_sizes_) {
314             static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
315             static_assert(sizeof...(Sizes) > 0, "no sizes provided");
316             std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
317             reshape(std::begin(new_sizes), std::end(new_sizes));
318         }
319 
320         /** @brief reshapes the tensor
321          *
322          * Pre-conditions:
323          * - the reference tensor must be a non-empty tensor
324          * - the reference tensor's rank must be lesser than or equal to the rank of target tensor
325          *
326          * Exception Guarantee: Strong
327          */
328         template <class TensorType>
reshape_as(const TensorType & tensor)329         void reshape_as(const TensorType& tensor) {
330             CV_Assert(!tensor.empty());
331             cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
332             for (int i = 0; i < new_sizes.size(); i++)
333                 new_sizes[i] = tensor.get_axis_size(i);
334             reshape(std::begin(new_sizes), std::end(new_sizes));
335         }
336 
337         /** @brief squeezes the tensor
338          *
339          * removes all axes of unit size
340          *
341          * Pre-conditions:
342          * - the tensor must be non-empty
343          * - the tensor's rank must be at least two
344          *
345          * Exception Guarantee: Strong
346          */
squeeze()347         void squeeze() {
348             CV_Assert(!empty());
349             CV_Assert(rank() >= 2);
350             auto itr = std::remove(std::begin(shape), std::end(shape), 1);
351             shape.resize(itr - std::begin(shape));
352         }
353 
354         /** @brief squeezes the tensor
355          *
356          * removes the specified axis if the axis length is one; otherwise, ignores the request
357          *
358          * Pre-conditions:
359          * - the tensor must be non-empty
360          * - the tensor's rank must be at least two
361          *
362          * Exception Guarantee: Strong
363          */
squeeze(int axis)364         void squeeze(int axis) {
365             CV_Assert(!empty());
366             CV_Assert(rank() >= 2);
367             axis = clamp_axis(axis, rank());
368             CV_Assert(axis >= 0 && axis < rank());
369             shape.erase(std::begin(shape) + axis);
370         }
371 
372         /** @brief squeezes the tensor
373          *
374          * removes leading singleton axes until the tensor's rank is equal to the requested rank
375          *
376          * Pre-conditions:
377          * - the tensor must be non-empty
378          * - the tensor's rank must be at least two
379          * - the tensor's rank must be at least the requested rank
380          * - the tensor must be squeezable up to the requested rank
381          *
382          * Exception Guarantee: Strong
383          */
squeeze_to(int r)384         void squeeze_to(int r) {
385             CV_Assert(!empty());
386             CV_Assert(rank() >= r);
387             CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; }));
388             std::copy(std::end(shape) - r, std::end(shape), std::begin(shape));
389             shape.resize(r);
390         }
391 
392         /** @brief unsqueezes the tensor
393          *
394          * adds a axis of unit size at the requested before the specified axis
395          *
396          * Pre-conditions:
397          * - the tensor must be non-empty
398          * - the tensor's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
399          *
400          * Exception Guarantee: Strong
401          */
unsqueeze(int axis=0)402         void unsqueeze(int axis = 0) {
403             CV_Assert(!empty());
404             CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
405             axis = clamp_axis(axis, rank());
406             CV_Assert(axis >= 0 && axis < rank());
407             shape.insert(std::begin(shape) + axis, 1);
408         }
409 
operator Span<T>()410         operator Span<T>() noexcept { return Span<T>(data.get(), size()); }
operator View<T>() const411         operator View<T>() const noexcept { return View<T>(data.get(), size()); }
412 
swap(Tensor & lhs,Tensor & rhs)413         friend void swap(Tensor& lhs, Tensor& rhs) noexcept {
414             using std::swap;
415             swap(lhs.data, rhs.data);
416             swap(lhs.shape, rhs.shape);
417         }
418 
419     private:
420         cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
421         ManagedPtr<value_type> data;
422     };
423 
424     /** @brief provides a non-owning mutable span of a Tensor
425      *
426      * \tparam  T       type of data stored by the tensor
427      *
428      * A span is valid if and only if the following hold true:
429      * - span is non-empty
430      * - spanned memory is still allocated
431      *
432      * A span may be used if and only if it is valid.
433      */
434     template <class T>
435     class TensorSpan {
436     public:
437         using value_type    = typename Tensor<T>::value_type;
438         using pointer       = typename Tensor<T>::pointer;
439         using const_pointer = typename Tensor<T>::const_pointer;
440         using size_type     = typename Tensor<T>::size_type;
441 
TensorSpan()442         TensorSpan() noexcept : ptr{ nullptr } { }
443         TensorSpan(const TensorSpan&) noexcept = default;
TensorSpan(Tensor<T> & tensor)444         TensorSpan(Tensor<T>& tensor) noexcept : ptr{ tensor.get() } {
445             const auto rank = tensor.rank();
446             shape.resize(rank);
447             for (int i = 0; i < rank; i++)
448                 shape[i] = tensor.get_axis_size(i);
449         }
450 
451         template <class ForwardItr>
TensorSpan(pointer ptr_,ForwardItr start,ForwardItr end)452         TensorSpan(pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } {
453             CV_Assert(start != end);
454             CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
455 
456             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
__anon31b2ff530302(ItrValueType x) 457             if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) {
458                 CV_Error(Error::StsBadArg, "the given shape contains negative or zero size");
459             }
460 
461             shape.assign(start, end);
462         }
463 
464         /** creates a subspan of a tensor (or span); refer to subspan method for more details */
465         template <class... Args>
TensorSpan(TensorSpan other,size_type offset,Args &&...args)466         TensorSpan(TensorSpan other, size_type offset, Args&&... args)
467             : TensorSpan(other.subspan(offset, std::forward<Args>(args)...)) { }
468 
469         /** returns true if the span is empty */
empty() const470         bool empty() const noexcept { return shape.size() == 0; }
471 
472         /** returns the total number of elements in the span
473          *
474          * Pre-conditions:
475          * - span must be non-empty
476          */
size() const477         size_type size() const noexcept {
478             CV_Assert(!empty());
479             return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
480         }
481 
482         /** returns the rank of the span
483          *
484          * Pre-conditions:
485          * - span must be non-empty
486          */
rank() const487         size_type rank() const noexcept {
488             CV_Assert(!empty());
489             return shape.size();
490         }
491 
492         /** @brief returns the length of the axis
493          *
494          * Every axis is assigned a zero-based index which can be used to select an axis.
495          * Negative index can be used to select an axis from the end.
496          *
497          * Examples:
498          * > -1 represents the last axis
499          * > 0 represents the first axis
500          * > 1 represents the second axis
501          *
502          * Pre-conditions:
503          * - span must be non-empty
504          * - the axis must be in the range [-rank(), rank())
505          */
get_axis_size(int axis) const506         size_type get_axis_size(int axis) const noexcept {
507             axis = clamp_axis(axis, rank());
508             CV_Assert(axis >= 0 && axis < rank());
509             return shape[axis];
510         }
511 
512         /** @brief returns the combined size of the axes in an axis range
513          *
514          * if the shape is [3 x 5 x 7 x 11]
515          * - `size_range(0, 2)` will return 3 x 5 = 15
516          * - `size_range(1, 3)` will return 5 x 7 = 35
517          * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
518          *
519          * Pre-conditions:
520          * - span must be non-empty
521          * - `axis_start` must be less than or equal to `axis_end`
522          * - `axis_end` must be less than or equal to the rank
523          *
524          * returns one if the two `axis_start` and `axis_end` are equal
525          */
size_range(size_type axis_start,size_type axis_end) const526         size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
527             CV_Assert(!empty());
528             CV_Assert(axis_start <= axis_end);
529             CV_Assert(axis_end <= rank());
530             auto start = std::begin(shape) + axis_start;
531             auto end = std::begin(shape) + axis_end;
532             return std::accumulate(start, end, 1, std::multiplies<size_type>());
533         }
534 
535         /** returns an std::vector containing axis lengths starting from axis zero
536          *
537          * Pre-conditions:
538          * - span must be non-empty
539          *
540          * Exception Guarantee: Strong
541          */
shape_as_vector() const542         std::vector<size_type> shape_as_vector() const {
543             CV_Assert(!empty());
544             return std::vector<size_type>(std::begin(shape), std::end(shape));
545         }
546 
547         /** returns a pointer to mutable device memory */
get() const548         pointer get() const noexcept { return ptr; }
549 
550         /** @brief clears the span
551          *
552          * Pre-conditions:
553          * - span must be non-empty
554          *
555          * Exception Guarantee: Strong
556          */
clear()557         void clear() noexcept {
558             CV_Assert(!empty());
559             ptr = nullptr;
560             shape.clear();
561         }
562 
563         /** @brief reshapes the span
564          *
565          * Length deduction:
566          * The length of at most one axis can be deduced using the total size constraint. The axis can
567          * be marked for deduction by specifying the corresponding size as -1.
568          *
569          * The axes for which no size was provided (excluding -1) will be assumed to be one.
570          *
571          * Pre-conditions:
572          * - the span must be non-empty
573          * - [start, end) represents a forward range containing the length of the axes in order
574          * - the number of axis lengths must be less than or equal to the rank
575          * - at most one axis length is allowed for length deduction
576          * - the lengths provided must ensure that the total number of elements remains unchanged
577          *
578          * Exception Guarantee: Strong
579          */
580         template <class ForwardItr>
581         typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void>
reshape(ForwardItr start,ForwardItr end)582         ::type reshape(ForwardItr start, ForwardItr end) {
583             CV_Assert(start != end);
584             CV_Assert(std::distance(start, end) <= rank());
585 
586             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
587 
588             /* the user may leave at most one axis size for deduction by specifying -1 */
589             auto sizes_to_deduce = std::count(start, end, -1);
590             if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
591 
592             /* sizes must be positive numbers with the exception of -1 */
593             auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
594                 return !(x > 0 || x == -1);
595             });
596             if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
597 
598             /* compute the total number of elements in the new tensor */
599             size_type unknown_size = 0;
600             auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
601             if (total < 0) {
602                 /* there is an unknown size */
603                 if (std::abs(total) <= size()) {
604                     unknown_size = size() / std::abs(total);
605                     total = size();
606                 }
607                 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
608                 ** Since `total` is negative, the size check which follows will fail and throw an error
609                 */
610             }
611 
612             /* the number of elements before and after reshape must be exactly same */
613             if (total != size()) {
614                CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
615             }
616 
617             /* we assume the size of the unspecified axes to be one */
618             std::fill(std::begin(shape), std::end(shape), 1);
619             std::copy_backward(start, end, std::end(shape));
620 
621             /* replace the unknown axis with the correct value */
622             std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
623         }
624 
625         /** @brief reshapes the tensor
626          * constructs a range out of the arguments and invokes the range-based reshape method
627          */
628         template <class ...Sizes>
reshape(Sizes...new_sizes_)629         void reshape(Sizes... new_sizes_) {
630             static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "unsupported tensor rank");
631             static_assert(sizeof...(Sizes) > 0, "no sizes provided");
632             std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
633             reshape(std::begin(new_sizes), std::end(new_sizes));
634         }
635 
636         /** @brief reshapes the span
637          *
638          * Pre-conditions:
639          * - the reference tensor/span/view must be non-empty
640          * - the reference tensor/span/view's rank must be less than or equal to the rank of the span
641          *
642          * Exception Guarantee: Strong
643          */
644         template <class TensorType>
reshape_as(const TensorType & tensor)645         void reshape_as(const TensorType& tensor) {
646             CV_Assert(!tensor.empty());
647             cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
648             for (int i = 0; i < new_sizes.size(); i++)
649                 new_sizes[i] = tensor.get_axis_size(i);
650             reshape(std::begin(new_sizes), std::end(new_sizes));
651         }
652 
653         /** @brief squeezes the tensor
654          *
655          * removes all axes of unit size
656          *
657          * Pre-conditions:
658          * - the span must be non-empty
659          * - the span's rank must be at least two
660          *
661          * Exception Guarantee: Strong
662          */
squeeze()663         void squeeze() {
664             CV_Assert(!empty());
665             CV_Assert(rank() >= 2);
666             auto itr = std::remove(std::begin(shape), std::end(shape), 1);
667             shape.resize(itr - std::begin(shape));
668         }
669 
670         /** @brief squeezes the tensor
671          *
672          * removes the specified axis if the axis length is one; otherwise, ignores the request
673          *
674          * Pre-conditions:
675          * - the span must be non-empty
676          * - the span's rank must be at least two
677          *
678          * Exception Guarantee: Strong
679          */
squeeze(int axis)680         void squeeze(int axis) {
681             CV_Assert(!empty());
682             CV_Assert(rank() >= 2);
683             axis = clamp_axis(axis, rank());
684             CV_Assert(axis >= 0 && axis < rank());
685             shape.erase(std::begin(shape) + axis);
686         }
687 
688         /** @brief squeezes the tensor
689          *
690          * removes leading singleton axes until the tensor's rank is equal to the requested rank
691          *
692          * Pre-conditions:
693          * - the tensor must be non-empty
694          * - the tensor's rank must be at least two
695          * - the tensor's rank must be at least the requested rank
696          * - the tensor must be squeezable up to the requested rank
697          *
698          * Exception Guarantee: Strong
699          */
squeeze_to(int r)700         void squeeze_to(int r) {
701             CV_Assert(!empty());
702             CV_Assert(rank() >= r);
703             CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; }));
704             std::copy(std::end(shape) - r, std::end(shape), std::begin(shape));
705             shape.resize(r);
706         }
707 
708         /** @brief unsqueezes the tensor
709          *
710          * adds a axis of unit size at the requested before the specified axis
711          *
712          * Pre-conditions:
713          * - the span must be non-empty
714          * - the span's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
715          *
716          * Exception Guarantee: Strong
717          */
unsqueeze(int axis=0)718         void unsqueeze(int axis = 0) {
719             CV_Assert(!empty());
720             CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
721             axis = clamp_axis(axis, rank());
722             CV_Assert(axis >= 0 && axis < rank());
723             shape.insert(std::begin(shape) + axis, 1);
724         }
725 
726         /** @brief obtains a subspan of the span
727          *
728          * Pre-conditions:
729          * - the span must be non-empty
730          * - the `offset` must be less than the size of the span
731          * - [start, end) represents a forward range containing length of the subspan axes
732          * - the lengths provided must ensure that the number of elements does not exceed (old size - offset)
733          *
734          * Exception Guarantee: Strong
735          */
736         template <class ForwardItr>
737         typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorSpan>
subspan(size_type offset,ForwardItr start,ForwardItr end) const738         ::type subspan(size_type offset, ForwardItr start, ForwardItr end) const {
739             CV_Assert(start != end);
740             CV_Assert(std::distance(start, end) <= rank());
741 
742             auto cur_size = size();
743             CV_Assert(offset < cur_size);
744 
745             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
746 
747             /* sizes must be positive numbers */
748             auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
749                 return !(x > 0);
750             });
751             if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
752 
753             /* the number of elements must be equal to the new size */
754             auto max_size = (cur_size - offset);
755             auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
756             if (total > max_size) {
757                 CV_Error(Error::StsBadArg, "axis lengths lead to OOB accesses");
758             }
759 
760             TensorSpan temp;
761             temp.shape.assign(start, end);
762             temp.ptr = ptr + offset;
763             return temp;
764         }
765 
766         /** @brief obtains a subspan of the span
767          * constructs a range out of the size arguments and invokes the range-based subspan method
768          */
769         template <class ...Sizes>
subspan(size_type offset,Sizes...new_sizes_) const770         TensorSpan subspan(size_type offset, Sizes... new_sizes_) const {
771             static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
772             static_assert(sizeof...(Sizes) > 0, "no sizes provided");
773             std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
774             return subspan(offset, std::begin(new_sizes), std::end(new_sizes));
775         }
776 
operator Span<T>()777         operator Span<T>() noexcept { return Span<T>(ptr, size()); }
operator View<T>() const778         operator View<T>() const noexcept { return View<T>(ptr, size()); }
779 
swap(TensorSpan & lhs,TensorSpan & rhs)780         friend void swap(TensorSpan& lhs, TensorSpan& rhs) noexcept {
781             using std::swap;
782             swap(lhs.ptr, rhs.ptr);
783             swap(lhs.shape, rhs.shape);
784         }
785 
786     private:
787         cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
788         pointer ptr;
789     };
790 
791     /** @brief view of a tensor
792      *
793      * \tparam  T       type of data stored by the tensor
794      *
795      * A view is valid if and only if the following hold true:
796      * - view is non-empty
797      * - viewed memory is still allocated
798      */
799     template <class T>
800     class TensorView {
801     public:
802         using value_type    = typename Tensor<T>::value_type;
803         using pointer       = typename Tensor<T>::pointer;
804         using const_pointer = typename Tensor<T>::const_pointer;
805         using size_type     = typename Tensor<T>::size_type;
806 
TensorView()807         TensorView() noexcept : ptr{ nullptr } { }
808         TensorView(const TensorView&) noexcept = default;
TensorView(TensorSpan<T> other)809         TensorView(TensorSpan<T> other) noexcept : ptr{ other.get() } {
810             const auto rank = other.rank();
811             shape.resize(rank);
812             for (int i = 0; i < rank; i++)
813                 shape[i] = other.get_axis_size(i);
814         }
TensorView(const Tensor<T> & tensor)815         TensorView(const Tensor<T>& tensor) noexcept : ptr{ tensor.get() } {
816             const auto rank = tensor.rank();
817             shape.resize(rank);
818             for (int i = 0; i < rank; i++)
819                 shape[i] = tensor.get_axis_size(i);
820         }
821 
822         template <class ForwardItr>
TensorView(const_pointer ptr_,ForwardItr start,ForwardItr end)823         TensorView(const_pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } {
824             CV_Assert(start != end);
825             CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK);
826 
827             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
__anon31b2ff530702(ItrValueType x) 828             if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) {
829                 CV_Error(Error::StsBadArg, "the given shape contains negative or zero size");
830             }
831 
832             shape.assign(start, end);
833         }
834 
835         /** creates a subview of a tensor (or span or view); refer to subview method for more details */
836         template <class... Args>
TensorView(TensorView other,size_type offset,Args &&...args)837         TensorView(TensorView other, size_type offset, Args&&... args) noexcept
838             : TensorView(other.subview(offset, std::forward<Args>(args)...)) { }
839 
840         TensorView& operator=(const TensorView&) = default;
operator =(TensorSpan<T> other)841         TensorView& operator=(TensorSpan<T> other) noexcept {
842             TensorView tmp(other);
843             swap(*this, tmp);
844             return *this;
845         }
846 
847         /** returns true if the view is empty */
empty() const848         bool empty() const noexcept { return shape.size() == 0; }
849 
850         /** returns the total number of elements in the view
851          *
852          * Pre-conditions:
853          * - view must be non-empty
854          */
size() const855         size_type size() const noexcept {
856             CV_Assert(!empty());
857             return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>());
858         }
859 
860         /** returns the rank of the view
861          *
862          * Pre-conditions:
863          * - view must be non-empty
864          */
rank() const865         size_type rank() const noexcept {
866             CV_Assert(!empty());
867             return shape.size();
868         }
869 
870         /** @brief returns the length of the axis
871          *
872          * Every axis is assigned a zero-based index which can be used to select an axis.
873          * Negative index can be used to select an axis from the end.
874          *
875          * Examples:
876          * > -1 represents the last axis
877          * > 0 represents the first axis
878          * > 1 represents the second axis
879          *
880          * Pre-conditions:
881          * - view must be non-empty
882          * - the axis must be in the range [-rank(), rank())
883          */
get_axis_size(int axis) const884         size_type get_axis_size(int axis) const noexcept {
885             axis = clamp_axis(axis, rank());
886             CV_Assert(axis >= 0 && axis < rank());
887             return shape[axis];
888         }
889 
890         /** @brief returns the combined size of the axes in an axis range
891          *
892          * if the shape is [3 x 5 x 7 x 11]
893          * - `size_range(0, 2)` will return 3 x 5 = 15
894          * - `size_range(1, 3)` will return 5 x 7 = 35
895          * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155
896          *
897          * Pre-conditions:
898          * - view must be non-empty
899          * - `axis_start` must be less than or equal to `axis_end`
900          * - `axis_end` must be less than or equal to the rank
901          *
902          * returns one if the two `axis_start` and `axis_end` are equal
903          */
size_range(size_type axis_start,size_type axis_end) const904         size_type size_range(size_type axis_start, size_type axis_end) const noexcept {
905             CV_Assert(!empty());
906             CV_Assert(axis_start <= axis_end);
907             CV_Assert(axis_end <= rank());
908             auto start = std::begin(shape) + axis_start;
909             auto end = std::begin(shape) + axis_end;
910             return std::accumulate(start, end, 1, std::multiplies<size_type>());
911         }
912 
913         /** returns an std::vector containing axis lengths starting from axis zero
914          *
915          * Pre-conditions:
916          * - view must be non-empty
917          *
918          * Exception Guarantee: Strong
919          */
shape_as_vector() const920         std::vector<size_type> shape_as_vector() const {
921             CV_Assert(!empty());
922             return std::vector<size_type>(std::begin(shape), std::end(shape));
923         }
924 
925         /** returns a device pointer to immutable device memory */
get() const926         const_pointer get() const noexcept { return ptr; }
927 
928         /** @brief reshapes the view
929          *
930          * Length deduction:
931          * The length of at most one axis can be deduced using the total size constraint. The axis can
932          * be marked for deduction by specifying the size as -1.
933          *
934          * The axes for which no size was provided (excluding -1) will be assumed to be one.
935          *
936          * Pre-conditions:
937          * - view must be non-empty
938          * - [start, end) represents a forward range containing length of the axes in order starting from axis zero
939          * - the number of axis lengths must be less than or equal to the tensor rank
940          * - at most one axis length is allowed for length deduction
941          * - the lengths provided must ensure that the total number of elements remains unchanged
942          *
943          * Exception Guarantee: Strong
944          */
945         template <class ForwardItr>
946         typename std::enable_if<!std::is_integral<ForwardItr>::value, void>
reshape(ForwardItr start,ForwardItr end)947         ::type reshape(ForwardItr start, ForwardItr end) {
948             CV_Assert(start != end);
949             CV_Assert(std::distance(start, end) <= rank());
950 
951             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
952 
953             /* the user may leave at most one axis size for deduction by specifying -1 */
954             auto sizes_to_deduce = std::count(start, end, -1);
955             if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); }
956 
957             /* sizes must be positive numbers with the exception of -1 */
958             auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
959                 return !(x > 0 || x == -1);
960             });
961             if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
962 
963             /* compute the total number of elements in the new tensor */
964             size_type unknown_size = 0;
965             auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
966             if (total < 0) {
967                 /* there is an unknown size */
968                 if (std::abs(total) <= size()) {
969                     unknown_size = size() / std::abs(total);
970                     total = size();
971                 }
972                 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible
973                 ** Since `total` is negative, the size check which follows will fail and throw an error
974                 */
975             }
976 
977             /* the number of elements before and after reshape must be exactly same */
978             if (total != size()) {
979                 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count");
980             }
981 
982             /* we assume the size of the unspecified axes to be one */
983             std::fill(std::begin(shape), std::end(shape), 1);
984             std::copy_backward(start, end, std::end(shape));
985 
986             /* replace the unknown axis with the correct value */
987             std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size);
988         }
989 
990         /** @brief reshapes the view
991          * constructs a range out of the arguments and invokes the range-based reshape method
992          */
993         template <class ...Sizes>
reshape(Sizes...new_sizes_)994         void reshape(Sizes... new_sizes_) {
995             static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
996             static_assert(sizeof...(Sizes) > 0, "no sizes provided");
997             std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
998             reshape(std::begin(new_sizes), std::end(new_sizes));
999         }
1000 
1001         /** @brief reshapes the view
1002          *
1003          * Pre-conditions:
1004          * - the reference tensor/span/view must be non-empty
1005          * - the reference tensor/span/view's rank must be less than or equal to the rank of the view
1006          *
1007          * Exception Guarantee: Strong
1008          */
1009         template <class TensorType>
reshape_as(const TensorType & tensor)1010         void reshape_as(const TensorType& tensor) {
1011             CV_Assert(!tensor.empty());
1012             cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank());
1013             for (int i = 0; i < new_sizes.size(); i++)
1014                 new_sizes[i] = tensor.get_axis_size(i);
1015             reshape(std::begin(new_sizes), std::end(new_sizes));
1016         }
1017 
1018         /** @brief squeezes the tensor
1019          *
1020          * removes all axes of unit size
1021          *
1022          * Pre-conditions:
1023          * - the view must be non-empty
1024          * - the view's rank must be at least two
1025          *
1026          * Exception Guarantee: Strong
1027          */
squeeze()1028         void squeeze() {
1029             CV_Assert(!empty());
1030             CV_Assert(rank() >= 2);
1031             auto itr = std::remove(std::begin(shape), std::end(shape), 1);
1032             shape.resize(itr - std::begin(shape));
1033         }
1034 
1035         /** @brief squeezes the tensor
1036          *
1037          * removes the specified axis if the axis length is one; otherwise, ignores the request
1038          *
1039          * Pre-conditions:
1040          * - the view must be non-empty
1041          * - the view's rank must be at least two
1042          *
1043          * Exception Guarantee: Strong
1044          */
squeeze(int axis)1045         void squeeze(int axis) {
1046             CV_Assert(!empty());
1047             CV_Assert(rank() >= 2);
1048             axis = clamp_axis(axis, rank());
1049             CV_Assert(axis >= 0 && axis < rank());
1050             shape.erase(std::begin(shape) + axis);
1051         }
1052 
1053         /** @brief squeezes the tensor
1054          *
1055          * removes leading singleton axes until the tensor's rank is equal to the requested rank
1056          *
1057          * Pre-conditions:
1058          * - the tensor must be non-empty
1059          * - the tensor's rank must be at least two
1060          * - the tensor's rank must be at least the requested rank
1061          * - the tensor must be squeezable up to the requested rank
1062          *
1063          * Exception Guarantee: Strong
1064          */
squeeze_to(int r)1065         void squeeze_to(int r) {
1066             CV_Assert(!empty());
1067             CV_Assert(rank() >= r);
1068             CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; }));
1069             std::copy(std::end(shape) - r, std::end(shape), std::begin(shape));
1070             shape.resize(r);
1071         }
1072 
1073         /** @brief unsqueezes the tensor
1074          *
1075          * adds a axis of unit size at the requested before the specified axis
1076          *
1077          * Pre-conditions:
1078          * - the view must be non-empty
1079          * - the view's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK)
1080          *
1081          * Exception Guarantee: Strong
1082          */
unsqueeze(int axis=0)1083         void unsqueeze(int axis = 0) {
1084             CV_Assert(!empty());
1085             CV_Assert(rank() < CSL_MAX_TENSOR_RANK);
1086             axis = clamp_axis(axis, rank());
1087             CV_Assert(axis >= 0 && axis < rank());
1088             shape.insert(std::begin(shape) + axis, 1);
1089         }
1090 
1091         /** @brief obtains a subview of the view
1092          *
1093          * The axes for which no size was provided will be assumed to be one.
1094          *
1095          * Pre-conditions:
1096          * - the view must be non-empty
1097          * - the `offset` must be less than the size of the view
1098          * - [start, end) represents a forward range containing length of the subview axes in order
1099          * - the number of axis lengths provided must be less than or equal to the tensor rank
1100          * - the lengths provided must ensure that the number of elements does not exceed (old size - offset)
1101          *
1102          * Exception Guarantee: Strong
1103          */
1104         template <class ForwardItr>
1105         typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorView>
subview(size_type offset,ForwardItr start,ForwardItr end) const1106         ::type subview(size_type offset, ForwardItr start, ForwardItr end) const {
1107             CV_Assert(start != end);
1108             CV_Assert(std::distance(start, end) <= rank());
1109 
1110             auto cur_size = size();
1111             CV_Assert(offset < cur_size);
1112 
1113             using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type;
1114 
1115             /* sizes must be positive numbers */
1116             auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) {
1117                 return !(x > 0);
1118             });
1119             if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); }
1120 
1121             /* the number of elements must be equal to the new size */
1122             auto max_size = (cur_size - offset);
1123             auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>());
1124             if (total > max_size) {
1125                 CV_Error(Error::StsBadArg, "axes lengths lead to OOB accesses");
1126             }
1127 
1128             TensorView temp;
1129             temp.shape.assign(start, end);
1130             temp.ptr = ptr + offset;
1131             return temp;
1132         }
1133 
1134         /** @brief obtains a subview of the view
1135          * constructs a range out of the size arguments and invokes the range-based subview method
1136          */
1137         template <class ...Sizes>
subview(size_type offset,Sizes...new_sizes_) const1138         TensorView subview(size_type offset, Sizes... new_sizes_) const {
1139             static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank");
1140             static_assert(sizeof...(Sizes) > 0, "no sizes provided");
1141             std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... };
1142             return subview(offset, std::begin(new_sizes), std::end(new_sizes));
1143         }
1144 
operator View<T>() const1145         operator View<T>() const noexcept { return View<T>(ptr, size()); }
1146 
swap(TensorView & lhs,TensorView & rhs)1147         friend void swap(TensorView& lhs, TensorView& rhs) noexcept {
1148             using std::swap;
1149             swap(lhs.ptr, rhs.ptr);
1150             swap(lhs.shape, rhs.shape);
1151         }
1152 
1153     private:
1154         cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape;
1155         const_pointer ptr;
1156     };
1157 
1158     /** returns true if the two TensorType objects have the same shape */
1159     template <class TensorType1, class TensorType2>
is_shape_same(const TensorType1 & x,const TensorType2 & y)1160     bool is_shape_same(const TensorType1& x, const TensorType2& y) noexcept {
1161         auto rank1 = x.rank();
1162         auto rank2 = y.rank();
1163 
1164         if (rank1 != rank2)
1165             return false;
1166 
1167         for (int i = 0; i < rank1; i++)
1168             if (x.get_axis_size(i) != y.get_axis_size(i))
1169                 return false;
1170         return true;
1171     }
1172 
1173     /** returns true if the two TensorType objects are compatible */
1174     template <class TensorType1, class TensorType2>
is_shape_compatible(const TensorType1 & x,const TensorType2 & y)1175     bool is_shape_compatible(const TensorType1& x, const TensorType2& y) noexcept {
1176         const auto rank1 = x.rank();
1177         const auto rank2 = y.rank();
1178 
1179         /* mathematically not required but is a technically required */
1180         if (rank1 != rank2)
1181             return false;
1182 
1183         for (int i = 0; i < rank1; i++)
1184             if (x.get_axis_size(i) != y.get_axis_size(i) &&
1185                 x.get_axis_size(i) != 1 && y.get_axis_size(i) != 1)
1186                 return false;
1187         return true;
1188     }
1189 
1190     /** returns the rank to which the given tensor can be squeezed to */
1191     template <class TensorType>
get_effective_rank(const TensorType & x)1192     std::size_t get_effective_rank(const TensorType& x) noexcept {
1193         const auto rank = x.rank();
1194         auto effective_rank = rank;
1195         for (int i = 0; i < rank; i++, effective_rank--)
1196             if (x.get_axis_size(i) != 1)
1197                 break;
1198         return effective_rank;
1199     }
1200 
1201 }}}} /* namespace cv::dnn::cuda4dnn::csl */
1202 
1203 #endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP */
1204