1 // This file is part of OpenCV project. 2 // It is subject to the license terms in the LICENSE file found in the top-level directory 3 // of this distribution and at http://opencv.org/license.html. 4 5 #ifndef OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP 6 #define OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP 7 8 #include "nvcc_defs.hpp" 9 #include "memory.hpp" 10 #include "cublas.hpp" 11 #include "cudnn.hpp" 12 #include "span.hpp" 13 14 #include "../cxx_utils/resizable_static_array.hpp" 15 #include "../cxx_utils/is_iterator.hpp" 16 17 #include <opencv2/core.hpp> 18 19 #include <cstddef> 20 #include <cstdint> 21 #include <type_traits> 22 #include <array> 23 #include <functional> 24 #include <algorithm> 25 #include <numeric> 26 #include <iterator> 27 #include <vector> 28 #include <utility> 29 30 #ifndef CSL_MAX_TENSOR_RANK 31 #define CSL_MAX_TENSOR_RANK 6 32 #endif 33 34 namespace cv { namespace dnn { namespace cuda4dnn { namespace csl { 35 36 /** \file tensor.hpp 37 * 38 * TYPE | OWNERSHIP | MUTABLE 39 * ------------ + --------- + -------- 40 * Tensor | Yes | Yes 41 * TensorSpan | No | Yes 42 * TensorView | No | No 43 * 44 * Tensor is implicitly convertible to TensorSpan and TensorView 45 * TensorSpan is implicitly convertible to TensorView 46 * 47 * Concepts and template parameter naming convention: 48 * - "MutableTensorType" can refer to a Tensor or TensorSpan 49 * - "ImmutableTensorType" can refer to a Tensor, TensorSpan or TensorView 50 * - "TensorType" can refer to a Tensor, TensorSpan or TensorView 51 * 52 * "ImmutableTensorType" is used when the tensor data might be used. 53 * "TensorType" is used when only meta-information such as the size or shape is required, i.e. the data won't be touched 54 */ 55 56 /** if the \p axis is a negative index, the equivalent positive index is returned; otherwise, returns \p axis */ clamp_axis(int axis,std::size_t rank)57 CUDA4DNN_HOST_DEVICE constexpr std::size_t clamp_axis(int axis, std::size_t rank) { 58 return axis < 0 ? axis + rank : axis; 59 } 60 61 /** @brief multi-dimensional contiguous non-copyable GPU tensor 62 * 63 * \tparam T type of data stored 64 * 65 * @note scalars or zero rank tensors are not supported 66 * @note the maximum rank supported is controlled by the `CSL_MAX_TENSOR_RANK` preprocessor symbol 67 */ 68 template <class T> 69 class Tensor { 70 static_assert(std::is_standard_layout<T>::value, "T must satisfy StandardLayoutType"); 71 72 public: 73 using value_type = typename ManagedPtr<T>::element_type; 74 using pointer = typename ManagedPtr<value_type>::pointer; 75 using const_pointer = typename ManagedPtr<value_type>::const_pointer; 76 using size_type = typename ManagedPtr<value_type>::size_type; 77 Tensor()78 Tensor() noexcept { } 79 Tensor(const Tensor&) = delete; Tensor(Tensor && other)80 Tensor(Tensor&& other) noexcept { 81 data = std::move(other.data); 82 shape = other.shape; 83 other.shape.clear(); 84 } 85 86 /** @brief constructs a tensor of a specific shape 87 * 88 * Whatever arguments are accepted by the resize methods are accepted here. 89 */ 90 template <class ...Args> Tensor(Args &&...sizes)91 Tensor(Args&&... sizes) { resize(std::forward<Args>(sizes)...); } 92 93 Tensor& operator=(const Tensor&) = delete; operator =(Tensor && other)94 Tensor& operator=(Tensor&& other) noexcept { 95 data = std::move(other.data); 96 shape = other.shape; 97 other.shape.clear(); 98 return *this; 99 } 100 101 /** returns true if the tensor is empty (or uninitialized) */ empty() const102 bool empty() const noexcept { return shape.size() == 0; } 103 104 /** returns the total number of elements in the tensor 105 * 106 * Pre-conditions: 107 * - tensor must be non-empty 108 */ size() const109 size_type size() const noexcept { 110 CV_Assert(!empty()); 111 return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>()); 112 } 113 114 /** returns the rank of the tensor 115 * 116 * Pre-conditions: 117 * - tensor must be non-empty 118 */ rank() const119 size_type rank() const noexcept { 120 CV_Assert(!empty()); 121 return shape.size(); 122 } 123 124 /** @brief returns the length of the axis 125 * 126 * Every axis is assigned a zero-based index which can be used to select an axis. 127 * Negative index can be used to select an axis from the end. 128 * 129 * Examples: 130 * > -1 represents the last axis 131 * > 0 represents the first axis 132 * > 1 represents the second axis 133 * 134 * Pre-conditions: 135 * - tensor must be non-empty 136 * - the axis must be in the range [-rank(), rank()) 137 */ get_axis_size(int axis) const138 size_type get_axis_size(int axis) const noexcept { 139 axis = clamp_axis(axis, rank()); 140 CV_Assert(axis >= 0 && axis < rank()); 141 return shape[axis]; 142 } 143 144 /** @brief returns the combined size of the axes in an axis range 145 * 146 * if the shape is [3 x 5 x 7 x 11] 147 * - `size_range(0, 2)` will return 3 x 5 = 15 148 * - `size_range(1, 3)` will return 5 x 7 = 35 149 * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155 150 * 151 * Pre-conditions: 152 * - tensor must be non-empty 153 * - `axis_start` must be less than or equal to `axis_end` 154 * - `axis_end` must be less than or equal to the rank 155 * 156 * returns one if the two `axis_start` and `axis_end` are equal 157 */ size_range(size_type axis_start,size_type axis_end) const158 size_type size_range(size_type axis_start, size_type axis_end) const noexcept { 159 CV_Assert(!empty()); 160 CV_Assert(axis_start <= axis_end); 161 CV_Assert(axis_end <= rank()); 162 auto start = std::begin(shape) + axis_start; 163 auto end = std::begin(shape) + axis_end; 164 return std::accumulate(start, end, 1, std::multiplies<size_type>()); 165 } 166 167 /** returns an std::vector containing axis lengths starting from axis zero 168 * 169 * Pre-conditions: 170 * - tensor must be non-empty 171 * 172 * Exception Guarantee: Strong 173 */ shape_as_vector() const174 std::vector<size_type> shape_as_vector() const { 175 CV_Assert(!empty()); 176 return std::vector<size_type>(std::begin(shape), std::end(shape)); 177 } 178 179 /** returns a pointer to mutable device memory owned by the tensor */ get()180 pointer get() noexcept { return data.get(); } 181 182 /** returns a pointer to immutable device memory owned by the tensor */ get() const183 const_pointer get() const noexcept { return data.get(); } 184 185 /** @brief releases the memory owned by the tensor 186 * 187 * Pre-conditions: 188 * - tensor must be non-empty 189 * 190 * Exception Guarantee: Strong 191 */ clear()192 void clear() { 193 CV_Assert(!empty()); 194 data.reset(); 195 shape.clear(); 196 } 197 198 /** @brief resizes the tensor 199 * 200 * Pre-conditions: 201 * - [start, end) represents a forward range containing the length of the axes in order starting from axis zero 202 * - number of lengths provided must not exceed the maximum tensor rank (CSL_MAX_TENSOR_RANK) 203 * - the sizes must be positive integers 204 * 205 * Exception Guarantee: Strong 206 */ 207 template <class ForwardItr> 208 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void> resize(ForwardItr start,ForwardItr end)209 ::type resize(ForwardItr start, ForwardItr end) { 210 CV_Assert(start != end); 211 CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK); 212 213 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; 214 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>()); 215 data.reset(total); 216 217 shape.assign(start, end); 218 } 219 220 /** @brief resizes the tensor 221 * constructs a range out of the arguments and invokes the range-based resize method 222 */ 223 template <class ...Sizes> resize(Sizes...new_sizes_)224 void resize(Sizes... new_sizes_) { 225 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank"); 226 static_assert(sizeof...(Sizes) > 0, "no sizes provided"); 227 std::array<size_type, sizeof...(Sizes)> new_sizes = { static_cast<size_type>(new_sizes_)... }; 228 resize(std::begin(new_sizes), std::end(new_sizes)); 229 } 230 231 /** @brief resizes the tensor 232 * 233 * Pre-conditions: 234 * - the reference tensor must be non-empty 235 * 236 * Exception Guarantee: Strong 237 */ 238 template <class TensorType> resize_as(const TensorType & tensor)239 void resize_as(const TensorType& tensor) { 240 CV_Assert(!tensor.empty()); 241 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank()); 242 for (int i = 0; i < new_sizes.size(); i++) 243 new_sizes[i] = tensor.get_axis_size(i); 244 resize(std::begin(new_sizes), std::end(new_sizes)); 245 } 246 247 /** @brief reshapes the tensor 248 * 249 * Length deduction: 250 * The length of at most one axis can be deduced using the total size constraint. The axis can 251 * be marked for deduction by specifying the size as -1. 252 * 253 * The axes for which no size was provided (excluding -1) will be assumed to be one. 254 * 255 * Pre-conditions: 256 * - the tensor must be non-empty 257 * - [start, end) represents a forward range containing the length of the axes starting from axis zero 258 * - the number of lengths provided must be less than or equal to the tensor rank 259 * - at most one axis length is allowed for length deduction 260 * - the lengths provided must ensure that the total number of elements remains unchanged 261 * 262 * Exception Guarantee: Strong 263 */ 264 template <class ForwardItr> 265 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void> reshape(ForwardItr start,ForwardItr end)266 ::type reshape(ForwardItr start, ForwardItr end) { 267 CV_Assert(start != end); 268 CV_Assert(std::distance(start, end) <= rank()); 269 270 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; 271 272 /* the user may leave at most one axis size for deduction by specifying -1 */ 273 auto sizes_to_deduce = std::count(start, end, -1); 274 if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); } 275 276 /* sizes must be positive numbers with the exception of -1 */ 277 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) { 278 return !(x > 0 || x == -1); 279 }); 280 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); } 281 282 /* compute the total number of elements in the new tensor */ 283 size_type unknown_size = 0; 284 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>()); 285 if (total < 0) { 286 /* there is an unknown size */ 287 if (std::abs(total) <= size()) { 288 unknown_size = size() / std::abs(total); 289 total = size(); 290 } 291 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible 292 ** Since `total` is negative, the size check which follows will fail and throw an error 293 */ 294 } 295 296 /* the number of elements before and after reshape must be exactly same */ 297 if (total != size()) { 298 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count"); 299 } 300 301 /* we assume the size of the unspecified axes to be one */ 302 std::fill(std::begin(shape), std::end(shape), 1); 303 std::copy_backward(start, end, std::end(shape)); 304 305 /* replace the unknown axis with the correct value */ 306 std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size); 307 } 308 309 /** @brief reshapes the tensor 310 * constructs a range out of the arguments and invokes range-based reshape method 311 */ 312 template <class ...Sizes> reshape(Sizes...new_sizes_)313 void reshape(Sizes... new_sizes_) { 314 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank"); 315 static_assert(sizeof...(Sizes) > 0, "no sizes provided"); 316 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... }; 317 reshape(std::begin(new_sizes), std::end(new_sizes)); 318 } 319 320 /** @brief reshapes the tensor 321 * 322 * Pre-conditions: 323 * - the reference tensor must be a non-empty tensor 324 * - the reference tensor's rank must be lesser than or equal to the rank of target tensor 325 * 326 * Exception Guarantee: Strong 327 */ 328 template <class TensorType> reshape_as(const TensorType & tensor)329 void reshape_as(const TensorType& tensor) { 330 CV_Assert(!tensor.empty()); 331 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank()); 332 for (int i = 0; i < new_sizes.size(); i++) 333 new_sizes[i] = tensor.get_axis_size(i); 334 reshape(std::begin(new_sizes), std::end(new_sizes)); 335 } 336 337 /** @brief squeezes the tensor 338 * 339 * removes all axes of unit size 340 * 341 * Pre-conditions: 342 * - the tensor must be non-empty 343 * - the tensor's rank must be at least two 344 * 345 * Exception Guarantee: Strong 346 */ squeeze()347 void squeeze() { 348 CV_Assert(!empty()); 349 CV_Assert(rank() >= 2); 350 auto itr = std::remove(std::begin(shape), std::end(shape), 1); 351 shape.resize(itr - std::begin(shape)); 352 } 353 354 /** @brief squeezes the tensor 355 * 356 * removes the specified axis if the axis length is one; otherwise, ignores the request 357 * 358 * Pre-conditions: 359 * - the tensor must be non-empty 360 * - the tensor's rank must be at least two 361 * 362 * Exception Guarantee: Strong 363 */ squeeze(int axis)364 void squeeze(int axis) { 365 CV_Assert(!empty()); 366 CV_Assert(rank() >= 2); 367 axis = clamp_axis(axis, rank()); 368 CV_Assert(axis >= 0 && axis < rank()); 369 shape.erase(std::begin(shape) + axis); 370 } 371 372 /** @brief squeezes the tensor 373 * 374 * removes leading singleton axes until the tensor's rank is equal to the requested rank 375 * 376 * Pre-conditions: 377 * - the tensor must be non-empty 378 * - the tensor's rank must be at least two 379 * - the tensor's rank must be at least the requested rank 380 * - the tensor must be squeezable up to the requested rank 381 * 382 * Exception Guarantee: Strong 383 */ squeeze_to(int r)384 void squeeze_to(int r) { 385 CV_Assert(!empty()); 386 CV_Assert(rank() >= r); 387 CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; })); 388 std::copy(std::end(shape) - r, std::end(shape), std::begin(shape)); 389 shape.resize(r); 390 } 391 392 /** @brief unsqueezes the tensor 393 * 394 * adds a axis of unit size at the requested before the specified axis 395 * 396 * Pre-conditions: 397 * - the tensor must be non-empty 398 * - the tensor's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK) 399 * 400 * Exception Guarantee: Strong 401 */ unsqueeze(int axis=0)402 void unsqueeze(int axis = 0) { 403 CV_Assert(!empty()); 404 CV_Assert(rank() < CSL_MAX_TENSOR_RANK); 405 axis = clamp_axis(axis, rank()); 406 CV_Assert(axis >= 0 && axis < rank()); 407 shape.insert(std::begin(shape) + axis, 1); 408 } 409 operator Span<T>()410 operator Span<T>() noexcept { return Span<T>(data.get(), size()); } operator View<T>() const411 operator View<T>() const noexcept { return View<T>(data.get(), size()); } 412 swap(Tensor & lhs,Tensor & rhs)413 friend void swap(Tensor& lhs, Tensor& rhs) noexcept { 414 using std::swap; 415 swap(lhs.data, rhs.data); 416 swap(lhs.shape, rhs.shape); 417 } 418 419 private: 420 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape; 421 ManagedPtr<value_type> data; 422 }; 423 424 /** @brief provides a non-owning mutable span of a Tensor 425 * 426 * \tparam T type of data stored by the tensor 427 * 428 * A span is valid if and only if the following hold true: 429 * - span is non-empty 430 * - spanned memory is still allocated 431 * 432 * A span may be used if and only if it is valid. 433 */ 434 template <class T> 435 class TensorSpan { 436 public: 437 using value_type = typename Tensor<T>::value_type; 438 using pointer = typename Tensor<T>::pointer; 439 using const_pointer = typename Tensor<T>::const_pointer; 440 using size_type = typename Tensor<T>::size_type; 441 TensorSpan()442 TensorSpan() noexcept : ptr{ nullptr } { } 443 TensorSpan(const TensorSpan&) noexcept = default; TensorSpan(Tensor<T> & tensor)444 TensorSpan(Tensor<T>& tensor) noexcept : ptr{ tensor.get() } { 445 const auto rank = tensor.rank(); 446 shape.resize(rank); 447 for (int i = 0; i < rank; i++) 448 shape[i] = tensor.get_axis_size(i); 449 } 450 451 template <class ForwardItr> TensorSpan(pointer ptr_,ForwardItr start,ForwardItr end)452 TensorSpan(pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } { 453 CV_Assert(start != end); 454 CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK); 455 456 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; __anon31b2ff530302(ItrValueType x) 457 if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) { 458 CV_Error(Error::StsBadArg, "the given shape contains negative or zero size"); 459 } 460 461 shape.assign(start, end); 462 } 463 464 /** creates a subspan of a tensor (or span); refer to subspan method for more details */ 465 template <class... Args> TensorSpan(TensorSpan other,size_type offset,Args &&...args)466 TensorSpan(TensorSpan other, size_type offset, Args&&... args) 467 : TensorSpan(other.subspan(offset, std::forward<Args>(args)...)) { } 468 469 /** returns true if the span is empty */ empty() const470 bool empty() const noexcept { return shape.size() == 0; } 471 472 /** returns the total number of elements in the span 473 * 474 * Pre-conditions: 475 * - span must be non-empty 476 */ size() const477 size_type size() const noexcept { 478 CV_Assert(!empty()); 479 return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>()); 480 } 481 482 /** returns the rank of the span 483 * 484 * Pre-conditions: 485 * - span must be non-empty 486 */ rank() const487 size_type rank() const noexcept { 488 CV_Assert(!empty()); 489 return shape.size(); 490 } 491 492 /** @brief returns the length of the axis 493 * 494 * Every axis is assigned a zero-based index which can be used to select an axis. 495 * Negative index can be used to select an axis from the end. 496 * 497 * Examples: 498 * > -1 represents the last axis 499 * > 0 represents the first axis 500 * > 1 represents the second axis 501 * 502 * Pre-conditions: 503 * - span must be non-empty 504 * - the axis must be in the range [-rank(), rank()) 505 */ get_axis_size(int axis) const506 size_type get_axis_size(int axis) const noexcept { 507 axis = clamp_axis(axis, rank()); 508 CV_Assert(axis >= 0 && axis < rank()); 509 return shape[axis]; 510 } 511 512 /** @brief returns the combined size of the axes in an axis range 513 * 514 * if the shape is [3 x 5 x 7 x 11] 515 * - `size_range(0, 2)` will return 3 x 5 = 15 516 * - `size_range(1, 3)` will return 5 x 7 = 35 517 * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155 518 * 519 * Pre-conditions: 520 * - span must be non-empty 521 * - `axis_start` must be less than or equal to `axis_end` 522 * - `axis_end` must be less than or equal to the rank 523 * 524 * returns one if the two `axis_start` and `axis_end` are equal 525 */ size_range(size_type axis_start,size_type axis_end) const526 size_type size_range(size_type axis_start, size_type axis_end) const noexcept { 527 CV_Assert(!empty()); 528 CV_Assert(axis_start <= axis_end); 529 CV_Assert(axis_end <= rank()); 530 auto start = std::begin(shape) + axis_start; 531 auto end = std::begin(shape) + axis_end; 532 return std::accumulate(start, end, 1, std::multiplies<size_type>()); 533 } 534 535 /** returns an std::vector containing axis lengths starting from axis zero 536 * 537 * Pre-conditions: 538 * - span must be non-empty 539 * 540 * Exception Guarantee: Strong 541 */ shape_as_vector() const542 std::vector<size_type> shape_as_vector() const { 543 CV_Assert(!empty()); 544 return std::vector<size_type>(std::begin(shape), std::end(shape)); 545 } 546 547 /** returns a pointer to mutable device memory */ get() const548 pointer get() const noexcept { return ptr; } 549 550 /** @brief clears the span 551 * 552 * Pre-conditions: 553 * - span must be non-empty 554 * 555 * Exception Guarantee: Strong 556 */ clear()557 void clear() noexcept { 558 CV_Assert(!empty()); 559 ptr = nullptr; 560 shape.clear(); 561 } 562 563 /** @brief reshapes the span 564 * 565 * Length deduction: 566 * The length of at most one axis can be deduced using the total size constraint. The axis can 567 * be marked for deduction by specifying the corresponding size as -1. 568 * 569 * The axes for which no size was provided (excluding -1) will be assumed to be one. 570 * 571 * Pre-conditions: 572 * - the span must be non-empty 573 * - [start, end) represents a forward range containing the length of the axes in order 574 * - the number of axis lengths must be less than or equal to the rank 575 * - at most one axis length is allowed for length deduction 576 * - the lengths provided must ensure that the total number of elements remains unchanged 577 * 578 * Exception Guarantee: Strong 579 */ 580 template <class ForwardItr> 581 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, void> reshape(ForwardItr start,ForwardItr end)582 ::type reshape(ForwardItr start, ForwardItr end) { 583 CV_Assert(start != end); 584 CV_Assert(std::distance(start, end) <= rank()); 585 586 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; 587 588 /* the user may leave at most one axis size for deduction by specifying -1 */ 589 auto sizes_to_deduce = std::count(start, end, -1); 590 if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); } 591 592 /* sizes must be positive numbers with the exception of -1 */ 593 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) { 594 return !(x > 0 || x == -1); 595 }); 596 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); } 597 598 /* compute the total number of elements in the new tensor */ 599 size_type unknown_size = 0; 600 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>()); 601 if (total < 0) { 602 /* there is an unknown size */ 603 if (std::abs(total) <= size()) { 604 unknown_size = size() / std::abs(total); 605 total = size(); 606 } 607 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible 608 ** Since `total` is negative, the size check which follows will fail and throw an error 609 */ 610 } 611 612 /* the number of elements before and after reshape must be exactly same */ 613 if (total != size()) { 614 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count"); 615 } 616 617 /* we assume the size of the unspecified axes to be one */ 618 std::fill(std::begin(shape), std::end(shape), 1); 619 std::copy_backward(start, end, std::end(shape)); 620 621 /* replace the unknown axis with the correct value */ 622 std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size); 623 } 624 625 /** @brief reshapes the tensor 626 * constructs a range out of the arguments and invokes the range-based reshape method 627 */ 628 template <class ...Sizes> reshape(Sizes...new_sizes_)629 void reshape(Sizes... new_sizes_) { 630 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "unsupported tensor rank"); 631 static_assert(sizeof...(Sizes) > 0, "no sizes provided"); 632 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... }; 633 reshape(std::begin(new_sizes), std::end(new_sizes)); 634 } 635 636 /** @brief reshapes the span 637 * 638 * Pre-conditions: 639 * - the reference tensor/span/view must be non-empty 640 * - the reference tensor/span/view's rank must be less than or equal to the rank of the span 641 * 642 * Exception Guarantee: Strong 643 */ 644 template <class TensorType> reshape_as(const TensorType & tensor)645 void reshape_as(const TensorType& tensor) { 646 CV_Assert(!tensor.empty()); 647 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank()); 648 for (int i = 0; i < new_sizes.size(); i++) 649 new_sizes[i] = tensor.get_axis_size(i); 650 reshape(std::begin(new_sizes), std::end(new_sizes)); 651 } 652 653 /** @brief squeezes the tensor 654 * 655 * removes all axes of unit size 656 * 657 * Pre-conditions: 658 * - the span must be non-empty 659 * - the span's rank must be at least two 660 * 661 * Exception Guarantee: Strong 662 */ squeeze()663 void squeeze() { 664 CV_Assert(!empty()); 665 CV_Assert(rank() >= 2); 666 auto itr = std::remove(std::begin(shape), std::end(shape), 1); 667 shape.resize(itr - std::begin(shape)); 668 } 669 670 /** @brief squeezes the tensor 671 * 672 * removes the specified axis if the axis length is one; otherwise, ignores the request 673 * 674 * Pre-conditions: 675 * - the span must be non-empty 676 * - the span's rank must be at least two 677 * 678 * Exception Guarantee: Strong 679 */ squeeze(int axis)680 void squeeze(int axis) { 681 CV_Assert(!empty()); 682 CV_Assert(rank() >= 2); 683 axis = clamp_axis(axis, rank()); 684 CV_Assert(axis >= 0 && axis < rank()); 685 shape.erase(std::begin(shape) + axis); 686 } 687 688 /** @brief squeezes the tensor 689 * 690 * removes leading singleton axes until the tensor's rank is equal to the requested rank 691 * 692 * Pre-conditions: 693 * - the tensor must be non-empty 694 * - the tensor's rank must be at least two 695 * - the tensor's rank must be at least the requested rank 696 * - the tensor must be squeezable up to the requested rank 697 * 698 * Exception Guarantee: Strong 699 */ squeeze_to(int r)700 void squeeze_to(int r) { 701 CV_Assert(!empty()); 702 CV_Assert(rank() >= r); 703 CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; })); 704 std::copy(std::end(shape) - r, std::end(shape), std::begin(shape)); 705 shape.resize(r); 706 } 707 708 /** @brief unsqueezes the tensor 709 * 710 * adds a axis of unit size at the requested before the specified axis 711 * 712 * Pre-conditions: 713 * - the span must be non-empty 714 * - the span's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK) 715 * 716 * Exception Guarantee: Strong 717 */ unsqueeze(int axis=0)718 void unsqueeze(int axis = 0) { 719 CV_Assert(!empty()); 720 CV_Assert(rank() < CSL_MAX_TENSOR_RANK); 721 axis = clamp_axis(axis, rank()); 722 CV_Assert(axis >= 0 && axis < rank()); 723 shape.insert(std::begin(shape) + axis, 1); 724 } 725 726 /** @brief obtains a subspan of the span 727 * 728 * Pre-conditions: 729 * - the span must be non-empty 730 * - the `offset` must be less than the size of the span 731 * - [start, end) represents a forward range containing length of the subspan axes 732 * - the lengths provided must ensure that the number of elements does not exceed (old size - offset) 733 * 734 * Exception Guarantee: Strong 735 */ 736 template <class ForwardItr> 737 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorSpan> subspan(size_type offset,ForwardItr start,ForwardItr end) const738 ::type subspan(size_type offset, ForwardItr start, ForwardItr end) const { 739 CV_Assert(start != end); 740 CV_Assert(std::distance(start, end) <= rank()); 741 742 auto cur_size = size(); 743 CV_Assert(offset < cur_size); 744 745 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; 746 747 /* sizes must be positive numbers */ 748 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) { 749 return !(x > 0); 750 }); 751 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); } 752 753 /* the number of elements must be equal to the new size */ 754 auto max_size = (cur_size - offset); 755 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>()); 756 if (total > max_size) { 757 CV_Error(Error::StsBadArg, "axis lengths lead to OOB accesses"); 758 } 759 760 TensorSpan temp; 761 temp.shape.assign(start, end); 762 temp.ptr = ptr + offset; 763 return temp; 764 } 765 766 /** @brief obtains a subspan of the span 767 * constructs a range out of the size arguments and invokes the range-based subspan method 768 */ 769 template <class ...Sizes> subspan(size_type offset,Sizes...new_sizes_) const770 TensorSpan subspan(size_type offset, Sizes... new_sizes_) const { 771 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank"); 772 static_assert(sizeof...(Sizes) > 0, "no sizes provided"); 773 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... }; 774 return subspan(offset, std::begin(new_sizes), std::end(new_sizes)); 775 } 776 operator Span<T>()777 operator Span<T>() noexcept { return Span<T>(ptr, size()); } operator View<T>() const778 operator View<T>() const noexcept { return View<T>(ptr, size()); } 779 swap(TensorSpan & lhs,TensorSpan & rhs)780 friend void swap(TensorSpan& lhs, TensorSpan& rhs) noexcept { 781 using std::swap; 782 swap(lhs.ptr, rhs.ptr); 783 swap(lhs.shape, rhs.shape); 784 } 785 786 private: 787 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape; 788 pointer ptr; 789 }; 790 791 /** @brief view of a tensor 792 * 793 * \tparam T type of data stored by the tensor 794 * 795 * A view is valid if and only if the following hold true: 796 * - view is non-empty 797 * - viewed memory is still allocated 798 */ 799 template <class T> 800 class TensorView { 801 public: 802 using value_type = typename Tensor<T>::value_type; 803 using pointer = typename Tensor<T>::pointer; 804 using const_pointer = typename Tensor<T>::const_pointer; 805 using size_type = typename Tensor<T>::size_type; 806 TensorView()807 TensorView() noexcept : ptr{ nullptr } { } 808 TensorView(const TensorView&) noexcept = default; TensorView(TensorSpan<T> other)809 TensorView(TensorSpan<T> other) noexcept : ptr{ other.get() } { 810 const auto rank = other.rank(); 811 shape.resize(rank); 812 for (int i = 0; i < rank; i++) 813 shape[i] = other.get_axis_size(i); 814 } TensorView(const Tensor<T> & tensor)815 TensorView(const Tensor<T>& tensor) noexcept : ptr{ tensor.get() } { 816 const auto rank = tensor.rank(); 817 shape.resize(rank); 818 for (int i = 0; i < rank; i++) 819 shape[i] = tensor.get_axis_size(i); 820 } 821 822 template <class ForwardItr> TensorView(const_pointer ptr_,ForwardItr start,ForwardItr end)823 TensorView(const_pointer ptr_, ForwardItr start, ForwardItr end) : ptr{ ptr_ } { 824 CV_Assert(start != end); 825 CV_Assert(std::distance(start, end) <= CSL_MAX_TENSOR_RANK); 826 827 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; __anon31b2ff530702(ItrValueType x) 828 if (std::any_of(start, end, [](ItrValueType x) { return x <= 0; })) { 829 CV_Error(Error::StsBadArg, "the given shape contains negative or zero size"); 830 } 831 832 shape.assign(start, end); 833 } 834 835 /** creates a subview of a tensor (or span or view); refer to subview method for more details */ 836 template <class... Args> TensorView(TensorView other,size_type offset,Args &&...args)837 TensorView(TensorView other, size_type offset, Args&&... args) noexcept 838 : TensorView(other.subview(offset, std::forward<Args>(args)...)) { } 839 840 TensorView& operator=(const TensorView&) = default; operator =(TensorSpan<T> other)841 TensorView& operator=(TensorSpan<T> other) noexcept { 842 TensorView tmp(other); 843 swap(*this, tmp); 844 return *this; 845 } 846 847 /** returns true if the view is empty */ empty() const848 bool empty() const noexcept { return shape.size() == 0; } 849 850 /** returns the total number of elements in the view 851 * 852 * Pre-conditions: 853 * - view must be non-empty 854 */ size() const855 size_type size() const noexcept { 856 CV_Assert(!empty()); 857 return std::accumulate(std::begin(shape), std::end(shape), 1, std::multiplies<size_type>()); 858 } 859 860 /** returns the rank of the view 861 * 862 * Pre-conditions: 863 * - view must be non-empty 864 */ rank() const865 size_type rank() const noexcept { 866 CV_Assert(!empty()); 867 return shape.size(); 868 } 869 870 /** @brief returns the length of the axis 871 * 872 * Every axis is assigned a zero-based index which can be used to select an axis. 873 * Negative index can be used to select an axis from the end. 874 * 875 * Examples: 876 * > -1 represents the last axis 877 * > 0 represents the first axis 878 * > 1 represents the second axis 879 * 880 * Pre-conditions: 881 * - view must be non-empty 882 * - the axis must be in the range [-rank(), rank()) 883 */ get_axis_size(int axis) const884 size_type get_axis_size(int axis) const noexcept { 885 axis = clamp_axis(axis, rank()); 886 CV_Assert(axis >= 0 && axis < rank()); 887 return shape[axis]; 888 } 889 890 /** @brief returns the combined size of the axes in an axis range 891 * 892 * if the shape is [3 x 5 x 7 x 11] 893 * - `size_range(0, 2)` will return 3 x 5 = 15 894 * - `size_range(1, 3)` will return 5 x 7 = 35 895 * - `size_range(0, 4)` will return 3 x 5 x 7 x 11 = 1155 896 * 897 * Pre-conditions: 898 * - view must be non-empty 899 * - `axis_start` must be less than or equal to `axis_end` 900 * - `axis_end` must be less than or equal to the rank 901 * 902 * returns one if the two `axis_start` and `axis_end` are equal 903 */ size_range(size_type axis_start,size_type axis_end) const904 size_type size_range(size_type axis_start, size_type axis_end) const noexcept { 905 CV_Assert(!empty()); 906 CV_Assert(axis_start <= axis_end); 907 CV_Assert(axis_end <= rank()); 908 auto start = std::begin(shape) + axis_start; 909 auto end = std::begin(shape) + axis_end; 910 return std::accumulate(start, end, 1, std::multiplies<size_type>()); 911 } 912 913 /** returns an std::vector containing axis lengths starting from axis zero 914 * 915 * Pre-conditions: 916 * - view must be non-empty 917 * 918 * Exception Guarantee: Strong 919 */ shape_as_vector() const920 std::vector<size_type> shape_as_vector() const { 921 CV_Assert(!empty()); 922 return std::vector<size_type>(std::begin(shape), std::end(shape)); 923 } 924 925 /** returns a device pointer to immutable device memory */ get() const926 const_pointer get() const noexcept { return ptr; } 927 928 /** @brief reshapes the view 929 * 930 * Length deduction: 931 * The length of at most one axis can be deduced using the total size constraint. The axis can 932 * be marked for deduction by specifying the size as -1. 933 * 934 * The axes for which no size was provided (excluding -1) will be assumed to be one. 935 * 936 * Pre-conditions: 937 * - view must be non-empty 938 * - [start, end) represents a forward range containing length of the axes in order starting from axis zero 939 * - the number of axis lengths must be less than or equal to the tensor rank 940 * - at most one axis length is allowed for length deduction 941 * - the lengths provided must ensure that the total number of elements remains unchanged 942 * 943 * Exception Guarantee: Strong 944 */ 945 template <class ForwardItr> 946 typename std::enable_if<!std::is_integral<ForwardItr>::value, void> reshape(ForwardItr start,ForwardItr end)947 ::type reshape(ForwardItr start, ForwardItr end) { 948 CV_Assert(start != end); 949 CV_Assert(std::distance(start, end) <= rank()); 950 951 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; 952 953 /* the user may leave at most one axis size for deduction by specifying -1 */ 954 auto sizes_to_deduce = std::count(start, end, -1); 955 if (sizes_to_deduce > 1) { CV_Error(Error::StsBadArg, "only one axis size can be deduced"); } 956 957 /* sizes must be positive numbers with the exception of -1 */ 958 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) { 959 return !(x > 0 || x == -1); 960 }); 961 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); } 962 963 /* compute the total number of elements in the new tensor */ 964 size_type unknown_size = 0; 965 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>()); 966 if (total < 0) { 967 /* there is an unknown size */ 968 if (std::abs(total) <= size()) { 969 unknown_size = size() / std::abs(total); 970 total = size(); 971 } 972 /* Edge case: if `total` is already more than size(), skip the deduction as it's impossible 973 ** Since `total` is negative, the size check which follows will fail and throw an error 974 */ 975 } 976 977 /* the number of elements before and after reshape must be exactly same */ 978 if (total != size()) { 979 CV_Error(Error::StsBadArg, "new axes do not preserve the tensor element count"); 980 } 981 982 /* we assume the size of the unspecified axes to be one */ 983 std::fill(std::begin(shape), std::end(shape), 1); 984 std::copy_backward(start, end, std::end(shape)); 985 986 /* replace the unknown axis with the correct value */ 987 std::replace(std::begin(shape), std::end(shape), size_type(-1), unknown_size); 988 } 989 990 /** @brief reshapes the view 991 * constructs a range out of the arguments and invokes the range-based reshape method 992 */ 993 template <class ...Sizes> reshape(Sizes...new_sizes_)994 void reshape(Sizes... new_sizes_) { 995 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank"); 996 static_assert(sizeof...(Sizes) > 0, "no sizes provided"); 997 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... }; 998 reshape(std::begin(new_sizes), std::end(new_sizes)); 999 } 1000 1001 /** @brief reshapes the view 1002 * 1003 * Pre-conditions: 1004 * - the reference tensor/span/view must be non-empty 1005 * - the reference tensor/span/view's rank must be less than or equal to the rank of the view 1006 * 1007 * Exception Guarantee: Strong 1008 */ 1009 template <class TensorType> reshape_as(const TensorType & tensor)1010 void reshape_as(const TensorType& tensor) { 1011 CV_Assert(!tensor.empty()); 1012 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> new_sizes(tensor.rank()); 1013 for (int i = 0; i < new_sizes.size(); i++) 1014 new_sizes[i] = tensor.get_axis_size(i); 1015 reshape(std::begin(new_sizes), std::end(new_sizes)); 1016 } 1017 1018 /** @brief squeezes the tensor 1019 * 1020 * removes all axes of unit size 1021 * 1022 * Pre-conditions: 1023 * - the view must be non-empty 1024 * - the view's rank must be at least two 1025 * 1026 * Exception Guarantee: Strong 1027 */ squeeze()1028 void squeeze() { 1029 CV_Assert(!empty()); 1030 CV_Assert(rank() >= 2); 1031 auto itr = std::remove(std::begin(shape), std::end(shape), 1); 1032 shape.resize(itr - std::begin(shape)); 1033 } 1034 1035 /** @brief squeezes the tensor 1036 * 1037 * removes the specified axis if the axis length is one; otherwise, ignores the request 1038 * 1039 * Pre-conditions: 1040 * - the view must be non-empty 1041 * - the view's rank must be at least two 1042 * 1043 * Exception Guarantee: Strong 1044 */ squeeze(int axis)1045 void squeeze(int axis) { 1046 CV_Assert(!empty()); 1047 CV_Assert(rank() >= 2); 1048 axis = clamp_axis(axis, rank()); 1049 CV_Assert(axis >= 0 && axis < rank()); 1050 shape.erase(std::begin(shape) + axis); 1051 } 1052 1053 /** @brief squeezes the tensor 1054 * 1055 * removes leading singleton axes until the tensor's rank is equal to the requested rank 1056 * 1057 * Pre-conditions: 1058 * - the tensor must be non-empty 1059 * - the tensor's rank must be at least two 1060 * - the tensor's rank must be at least the requested rank 1061 * - the tensor must be squeezable up to the requested rank 1062 * 1063 * Exception Guarantee: Strong 1064 */ squeeze_to(int r)1065 void squeeze_to(int r) { 1066 CV_Assert(!empty()); 1067 CV_Assert(rank() >= r); 1068 CV_Assert(std::all_of(std::begin(shape), std::end(shape) - r, [](size_type x){ return x == 1; })); 1069 std::copy(std::end(shape) - r, std::end(shape), std::begin(shape)); 1070 shape.resize(r); 1071 } 1072 1073 /** @brief unsqueezes the tensor 1074 * 1075 * adds a axis of unit size at the requested before the specified axis 1076 * 1077 * Pre-conditions: 1078 * - the view must be non-empty 1079 * - the view's rank must be less than the maximum supported rank (CSL_MAX_TENSOR_RANK) 1080 * 1081 * Exception Guarantee: Strong 1082 */ unsqueeze(int axis=0)1083 void unsqueeze(int axis = 0) { 1084 CV_Assert(!empty()); 1085 CV_Assert(rank() < CSL_MAX_TENSOR_RANK); 1086 axis = clamp_axis(axis, rank()); 1087 CV_Assert(axis >= 0 && axis < rank()); 1088 shape.insert(std::begin(shape) + axis, 1); 1089 } 1090 1091 /** @brief obtains a subview of the view 1092 * 1093 * The axes for which no size was provided will be assumed to be one. 1094 * 1095 * Pre-conditions: 1096 * - the view must be non-empty 1097 * - the `offset` must be less than the size of the view 1098 * - [start, end) represents a forward range containing length of the subview axes in order 1099 * - the number of axis lengths provided must be less than or equal to the tensor rank 1100 * - the lengths provided must ensure that the number of elements does not exceed (old size - offset) 1101 * 1102 * Exception Guarantee: Strong 1103 */ 1104 template <class ForwardItr> 1105 typename std::enable_if<cxx_utils::is_forward_iterator<ForwardItr>::value, TensorView> subview(size_type offset,ForwardItr start,ForwardItr end) const1106 ::type subview(size_type offset, ForwardItr start, ForwardItr end) const { 1107 CV_Assert(start != end); 1108 CV_Assert(std::distance(start, end) <= rank()); 1109 1110 auto cur_size = size(); 1111 CV_Assert(offset < cur_size); 1112 1113 using ItrValueType = typename std::iterator_traits<ForwardItr>::value_type; 1114 1115 /* sizes must be positive numbers */ 1116 auto invalid_sizes = std::count_if(start, end, [](ItrValueType x) { 1117 return !(x > 0); 1118 }); 1119 if (invalid_sizes) { CV_Error(Error::StsBadArg, "invalid axis size"); } 1120 1121 /* the number of elements must be equal to the new size */ 1122 auto max_size = (cur_size - offset); 1123 auto total = std::accumulate(start, end, 1, std::multiplies<ItrValueType>()); 1124 if (total > max_size) { 1125 CV_Error(Error::StsBadArg, "axes lengths lead to OOB accesses"); 1126 } 1127 1128 TensorView temp; 1129 temp.shape.assign(start, end); 1130 temp.ptr = ptr + offset; 1131 return temp; 1132 } 1133 1134 /** @brief obtains a subview of the view 1135 * constructs a range out of the size arguments and invokes the range-based subview method 1136 */ 1137 template <class ...Sizes> subview(size_type offset,Sizes...new_sizes_) const1138 TensorView subview(size_type offset, Sizes... new_sizes_) const { 1139 static_assert(sizeof...(Sizes) <= CSL_MAX_TENSOR_RANK, "required rank exceeds maximum supported rank"); 1140 static_assert(sizeof...(Sizes) > 0, "no sizes provided"); 1141 std::array<std::int64_t, sizeof...(Sizes)> new_sizes = { static_cast<std::int64_t>(new_sizes_)... }; 1142 return subview(offset, std::begin(new_sizes), std::end(new_sizes)); 1143 } 1144 operator View<T>() const1145 operator View<T>() const noexcept { return View<T>(ptr, size()); } 1146 swap(TensorView & lhs,TensorView & rhs)1147 friend void swap(TensorView& lhs, TensorView& rhs) noexcept { 1148 using std::swap; 1149 swap(lhs.ptr, rhs.ptr); 1150 swap(lhs.shape, rhs.shape); 1151 } 1152 1153 private: 1154 cxx_utils::resizable_static_array<size_type, CSL_MAX_TENSOR_RANK> shape; 1155 const_pointer ptr; 1156 }; 1157 1158 /** returns true if the two TensorType objects have the same shape */ 1159 template <class TensorType1, class TensorType2> is_shape_same(const TensorType1 & x,const TensorType2 & y)1160 bool is_shape_same(const TensorType1& x, const TensorType2& y) noexcept { 1161 auto rank1 = x.rank(); 1162 auto rank2 = y.rank(); 1163 1164 if (rank1 != rank2) 1165 return false; 1166 1167 for (int i = 0; i < rank1; i++) 1168 if (x.get_axis_size(i) != y.get_axis_size(i)) 1169 return false; 1170 return true; 1171 } 1172 1173 /** returns true if the two TensorType objects are compatible */ 1174 template <class TensorType1, class TensorType2> is_shape_compatible(const TensorType1 & x,const TensorType2 & y)1175 bool is_shape_compatible(const TensorType1& x, const TensorType2& y) noexcept { 1176 const auto rank1 = x.rank(); 1177 const auto rank2 = y.rank(); 1178 1179 /* mathematically not required but is a technically required */ 1180 if (rank1 != rank2) 1181 return false; 1182 1183 for (int i = 0; i < rank1; i++) 1184 if (x.get_axis_size(i) != y.get_axis_size(i) && 1185 x.get_axis_size(i) != 1 && y.get_axis_size(i) != 1) 1186 return false; 1187 return true; 1188 } 1189 1190 /** returns the rank to which the given tensor can be squeezed to */ 1191 template <class TensorType> get_effective_rank(const TensorType & x)1192 std::size_t get_effective_rank(const TensorType& x) noexcept { 1193 const auto rank = x.rank(); 1194 auto effective_rank = rank; 1195 for (int i = 0; i < rank; i++, effective_rank--) 1196 if (x.get_axis_size(i) != 1) 1197 break; 1198 return effective_rank; 1199 } 1200 1201 }}}} /* namespace cv::dnn::cuda4dnn::csl */ 1202 1203 #endif /* OPENCV_DNN_SRC_CUDA4DNN_CSL_TENSOR_HPP */ 1204