1 /* 2 * This file is part of the GROMACS molecular simulation package. 3 * 4 * Copyright (c) 2016,2017,2018,2019,2020,2021, by the GROMACS development team, led by 5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, 6 * and including many others, as listed in the AUTHORS file in the 7 * top-level source directory and at http://www.gromacs.org. 8 * 9 * GROMACS is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public License 11 * as published by the Free Software Foundation; either version 2.1 12 * of the License, or (at your option) any later version. 13 * 14 * GROMACS is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public 20 * License along with GROMACS; if not, see 21 * http://www.gnu.org/licenses, or write to the Free Software Foundation, 22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 23 * 24 * If you want to redistribute modifications to GROMACS, please 25 * consider that scientific software is very special. Version 26 * control is crucial - bugs must be traceable. We will be happy to 27 * consider code for inclusion in the official distribution, but 28 * derived work must not be called official GROMACS. Details are found 29 * in the README & COPYING files - if they are missing, get the 30 * official version at http://www.gromacs.org. 31 * 32 * To help us fund GROMACS development, we humbly ask that you cite 33 * the research papers on the package. Check out http://www.gromacs.org. 34 */ 35 /*! \file 36 * \brief 37 * Declares gmx::PaddedRVecVector 38 * 39 * \author Mark Abraham <mark.j.abraham@gmail.com> 40 * \inpublicapi 41 * \ingroup module_math 42 */ 43 #ifndef GMX_MATH_PADDEDVECTOR_H 44 #define GMX_MATH_PADDEDVECTOR_H 45 46 #include <algorithm> 47 #include <utility> 48 #include <vector> 49 50 #include "gromacs/math/arrayrefwithpadding.h" 51 #include "gromacs/math/vectypes.h" 52 #include "gromacs/utility/alignedallocator.h" 53 #include "gromacs/utility/allocator.h" 54 #include "gromacs/utility/arrayref.h" 55 56 namespace gmx 57 { 58 59 namespace detail 60 { 61 62 /*! \brief Traits classes for handling padding for types used with PaddedVector 63 * 64 * Only the base types of the SIMD module are supported for 65 * PaddedVector, because the purpose of the padding is to permit 66 * SIMD-width operations from the SIMD module. 67 * 68 * \todo Consider explicitly tying these types to the SimdTrait 69 * types. That would require depending on the SIMD module, or 70 * extracting the traits from it. This would also permit 71 * maxSimdWidthOfBaseType to be set more efficiently, e.g. as a 72 * metaprogramming max over the maximum width from different 73 * implementations. 74 */ 75 template<typename T> 76 struct PaddingTraits 77 { 78 }; 79 80 template<> 81 struct PaddingTraits<int32_t> 82 { 83 using SimdBaseType = int32_t; 84 static constexpr int maxSimdWidthOfBaseType = 16; 85 }; 86 87 template<> 88 struct PaddingTraits<float> 89 { 90 using SimdBaseType = float; 91 static constexpr int maxSimdWidthOfBaseType = GMX_FLOAT_MAX_SIMD_WIDTH; 92 }; 93 94 template<> 95 struct PaddingTraits<double> 96 { 97 using SimdBaseType = double; 98 static constexpr int maxSimdWidthOfBaseType = GMX_DOUBLE_MAX_SIMD_WIDTH; 99 }; 100 101 template<> 102 struct PaddingTraits<BasicVector<float>> 103 { 104 using SimdBaseType = float; 105 static constexpr int maxSimdWidthOfBaseType = GMX_FLOAT_MAX_SIMD_WIDTH; 106 }; 107 108 template<> 109 struct PaddingTraits<BasicVector<double>> 110 { 111 using SimdBaseType = double; 112 static constexpr int maxSimdWidthOfBaseType = GMX_DOUBLE_MAX_SIMD_WIDTH; 113 }; 114 115 /*! \brief Returns the allocation size for PaddedVector that contains 116 * \c numElements elements plus padding for SIMD operations. 117 * 118 * \param[in] numElements The number of T elements for which data will be stored. 119 * \returns The number of T elements that must be allocated 120 * (ie >= numElements). 121 */ 122 template<typename T> 123 index computePaddedSize(index numElements) 124 { 125 // We don't need padding if there is no access. 126 if (numElements == 0) 127 { 128 return 0; 129 } 130 131 // We sometimes load a whole extra element when doing 4-wide SIMD 132 // operations (which might e.g. be an RVec) so we need to pad for 133 // that. 134 index simdScatterAccessSize = numElements + 1; 135 136 // For SIMD updates based on RVec, we might load starting from 137 // the last RVec element, so that sets the minimum extent of the 138 // padding. That extent must take the initialized allocation up to 139 // the SIMD width of the base type multiplied by the width of T in 140 // that base type. But since storage_ contains RVec, we only have 141 // to tell it the number of elements, which means to round up to 142 // the next SIMD width. 143 // 144 // We don't want a dependence on the SIMD module for the actual 145 // SIMD width of the base type, so we use maximum for the base 146 // type via the traits. A little extra padding won't really hurt. 147 constexpr int maxSimdWidth = PaddingTraits<T>::maxSimdWidthOfBaseType; 148 index simdFlatAccessSize = (numElements + (maxSimdWidth - 1)) / maxSimdWidth * maxSimdWidth; 149 150 return std::max(simdScatterAccessSize, simdFlatAccessSize); 151 } 152 153 //! Helper function to insert padding elements for most T. 154 template<typename T, typename AllocatorType> 155 inline void insertPaddingElements(std::vector<T, AllocatorType>* v, index newPaddedSize) 156 { 157 // Ensure the padding region is initialized to zero. There is no 158 // way to insert a number of default-initialized elements. So we 159 // have to provide a value for those elements, which anyway suits 160 // this use case. 161 v->insert(v->end(), newPaddedSize - v->size(), 0); 162 } 163 164 //! Specialization of helper function to insert padding elements, used for BasicVector<T>. 165 template<typename T, typename AllocatorType> 166 inline void insertPaddingElements(std::vector<BasicVector<T>, AllocatorType>* v, index newPaddedSize) 167 { 168 // Ensure the padding region is initialized to zero. 169 v->insert(v->end(), newPaddedSize - v->size(), BasicVector<T>(0, 0, 0)); 170 } 171 172 } // namespace detail 173 174 /*! \brief PaddedVector is a container of elements in contiguous 175 * storage that allocates extra memory for safe SIMD-style loads for 176 * operations used in GROMACS. 177 * 178 * \tparam T the type of objects within the container 179 * \tparam Allocator the allocator used. Can be any standard-compliant 180 * allocator, such gmx::Allocator used for alignment and/or pinning. 181 * 182 * The interface resembles std::vector. However, access 183 * intended to include padded elements must be via ArrayRef objects 184 * explicitly created to view those elements. Most other aspects of 185 * this vector refer to the unpadded view, e.g. iterators, data(), 186 * size(). 187 * 188 * The underlying storage is allocated with extra elements, properly 189 * initialized, that ensure that any operations accessing the any 190 * non-additional element that operate on memory equivalent to a full 191 * SIMD lane do so on allocated memory that has been initialized, so 192 * that memory traps will not occur, and arithmetic operations will 193 * not cause e.g. floating-point exceptions so long as the values in 194 * the padded elements are properly managed. 195 * 196 * Proper initialization is tricker than it would first appear, since 197 * we intend this container to be used with scalar and class types 198 * (e.g. RVec). Resize and construction operations use "default 199 * insertion" which leads to zero initialization for the former, and 200 * calling the default constructor for the latter. BasicVector has a 201 * default constructor that leaves the elements uninitialized, which 202 * is particularly risky for elements only present as padding. Thus 203 * the implementation specifically initializes the padded elements to 204 * zero, which makes no difference to the scalar template 205 * instantiations, and makes the BasicVector ones safer to use. 206 * 207 * Because the allocator can be configured, the memory allocation can 208 * have other attributes such as SIMD alignment or being pinned to 209 * physical memory for efficient transfers. The default allocator 210 * ensures alignment, but std::allocator also works. 211 */ 212 template<typename T, typename Allocator = Allocator<T, AlignedAllocationPolicy>> 213 class PaddedVector 214 { 215 public: 216 //! Standard helper types 217 //! \{ 218 using value_type = T; 219 using allocator_type = Allocator; 220 using size_type = index; 221 using reference = value_type&; 222 using const_reference = const value_type&; 223 using storage_type = std::vector<T, allocator_type>; 224 using pointer = typename storage_type::pointer; 225 using const_pointer = typename storage_type::const_pointer; 226 using iterator = typename storage_type::iterator; 227 using const_iterator = typename storage_type::const_iterator; 228 using difference_type = typename storage_type::iterator::difference_type; 229 //! \} 230 231 PaddedVector() : storage_(), unpaddedEnd_(begin()) {} 232 /*! \brief Constructor that specifies the initial size. */ 233 explicit PaddedVector(size_type count, const allocator_type& allocator = Allocator()) : 234 storage_(count, allocator), 235 unpaddedEnd_(begin() + count) 236 { 237 // The count elements have been default inserted, and now 238 // the padding elements are added 239 resizeWithPadding(count); 240 } 241 /*! \brief Constructor that specifies the initial size and an element to copy. */ 242 explicit PaddedVector(size_type count, value_type const& v, const allocator_type& allocator = Allocator()) : 243 storage_(count, v, allocator), 244 unpaddedEnd_(begin() + count) 245 { 246 // The count elements have been default inserted, and now 247 // the padding elements are added 248 resizeWithPadding(count); 249 } 250 //! Default constructor with allocator 251 explicit PaddedVector(allocator_type const& allocator) : 252 storage_(allocator), 253 unpaddedEnd_(begin()) 254 { 255 } 256 //! Copy constructor 257 PaddedVector(PaddedVector const& o) : storage_(o.storage_), unpaddedEnd_(begin() + o.size()) {} 258 /*! \brief Move constructor 259 * 260 * Leaves \c o in a valid state (ie the destructor can be 261 * called). */ 262 PaddedVector(PaddedVector&& o) noexcept : 263 storage_(std::exchange(o.storage_, {})), 264 unpaddedEnd_(o.unpaddedEnd_) 265 { 266 } 267 /*! \brief Move constructor using \c alloc for the new vector. 268 * 269 * Note that \c alloc is another instance of the same allocator 270 * type as used for \c PaddedVector. This makes sense e.g. for 271 * stateful allocators such as HostAllocator used in 272 * PaddedHostVector. 273 * 274 * Leaves \c o in a valid state (ie. the destructor can be 275 * called). */ 276 PaddedVector(PaddedVector&& o, const Allocator& alloc) noexcept : 277 storage_(alloc), 278 unpaddedEnd_(begin()) 279 { 280 if (alloc == o.storage_.get_allocator()) 281 { 282 std::swap(storage_, o.storage_); 283 unpaddedEnd_ = o.unpaddedEnd_; 284 } 285 else 286 { 287 // If the allocator compares differently, we must 288 // reallocate and copy. 289 auto unpaddedSize = o.size(); 290 resizeWithPadding(unpaddedSize); 291 std::copy(o.begin(), o.end(), storage_.begin()); 292 unpaddedEnd_ = begin() + unpaddedSize; 293 } 294 } 295 //! Construct from an initializer list 296 PaddedVector(std::initializer_list<value_type> const& il) : 297 storage_(il), 298 unpaddedEnd_(storage_.end()) 299 { 300 // We can't choose the padding until we know the size of 301 // the normal vector, so we have to make the storage_ and 302 // then resize it. 303 resizeWithPadding(storage_.size()); 304 } 305 //! Reserve storage for the container to contain newExtent elements, plus the required padding. 306 void reserveWithPadding(const size_type newExtent) 307 { 308 auto unpaddedSize = end() - begin(); 309 /* v.reserve(13) should allocate enough memory so that 310 v.resize(13) does not reallocate. This means that the 311 new extent should be large enough for the padded 312 storage for a vector whose size is newExtent. */ 313 auto newPaddedExtent = detail::computePaddedSize<T>(newExtent); 314 storage_.reserve(newPaddedExtent); 315 unpaddedEnd_ = begin() + unpaddedSize; 316 } 317 //! Resize the container to contain newSize elements, plus the required padding. 318 void resizeWithPadding(const size_type newSize) 319 { 320 // When the contained type is e.g. a scalar, then the 321 // default initialization behaviour is to zero all 322 // elements, which is OK, but we have to make sure that it 323 // happens for the elements in the padded region when the 324 // vector is shrinking. 325 auto newPaddedSize = detail::computePaddedSize<T>(newSize); 326 // Make sure there is room for padding if we need to grow. 327 storage_.reserve(newPaddedSize); 328 // Make the unpadded size correct, with any additional 329 // elements initialized by the default constructor. It is 330 // particularly important to destruct former elements when 331 // newSize is smaller than the old size. 332 storage_.resize(newSize); 333 // Ensure the padding region is zeroed if required. 334 detail::insertPaddingElements(&storage_, newPaddedSize); 335 unpaddedEnd_ = begin() + newSize; 336 } 337 //! Return the size of the view without the padding. 338 size_type size() const { return end() - begin(); } 339 //! Return the container size including the padding. 340 size_type paddedSize() const { return storage_.size(); } 341 //! Return whether the storage is empty. 342 bool empty() const { return storage_.empty(); } 343 //! Swap two PaddedVectors 344 void swap(PaddedVector& x) 345 { 346 std::swap(storage_, x.storage_); 347 std::swap(unpaddedEnd_, x.unpaddedEnd_); 348 } 349 //! Clear the vector, ie. set size to zero and remove padding. 350 void clear() 351 { 352 storage_.clear(); 353 unpaddedEnd_ = begin(); 354 } 355 //! Iterator getters refer to a view without padding. 356 //! \{ 357 pointer data() noexcept { return storage_.data(); } 358 const_pointer data() const noexcept { return storage_.data(); } 359 360 iterator begin() { return storage_.begin(); } 361 iterator end() { return iterator(unpaddedEnd_); } 362 363 const_iterator cbegin() { return const_iterator(begin()); } 364 const_iterator cend() { return const_iterator(unpaddedEnd_); } 365 366 const_iterator begin() const { return storage_.begin(); } 367 const_iterator end() const { return const_iterator(unpaddedEnd_); } 368 369 const_iterator cbegin() const { return const_iterator(begin()); } 370 const_iterator cend() const { return const_iterator(unpaddedEnd_); } 371 //! \} 372 // TODO should these do bounds checking for the unpadded range? In debug mode? 373 //! Indexing operator. 374 reference operator[](int i) { return storage_[i]; } 375 //! Indexing operator as const. 376 const_reference operator[](int i) const { return storage_[i]; } 377 //! Returns an ArrayRef of elements that includes the padding region, e.g. for use in SIMD code. 378 ArrayRefWithPadding<T> arrayRefWithPadding() 379 { 380 return ArrayRefWithPadding<T>(data(), data() + size(), data() + paddedSize()); 381 } 382 //! Returns an ArrayRef of const elements that includes the padding region, e.g. for use in SIMD code. 383 ArrayRefWithPadding<const T> constArrayRefWithPadding() const 384 { 385 return ArrayRefWithPadding<const T>(data(), data() + size(), data() + paddedSize()); 386 } 387 /*! \brief Returns an rvec * pointer for containers of RVec, for use with legacy code. 388 * 389 * \todo Use std::is_same_v when CUDA 11 is a requirement. 390 */ 391 template<typename AlsoT = T, typename = typename std::enable_if<std::is_same<AlsoT, RVec>::value>> 392 rvec* rvec_array() 393 { 394 return as_rvec_array(data()); 395 } 396 /*! \brief Returns a const rvec * pointer for containers of RVec, for use with legacy code. 397 * 398 * \todo Use std::is_same_v when CUDA 11 is a requirement. 399 */ 400 template<typename AlsoT = T, typename = typename std::enable_if<std::is_same<AlsoT, RVec>::value>> 401 const rvec* rvec_array() const 402 { 403 return as_rvec_array(data()); 404 } 405 //! Copy assignment operator 406 PaddedVector& operator=(PaddedVector const& o) 407 { 408 if (&o != this) 409 { 410 storage_ = o.storage_; 411 unpaddedEnd_ = begin() + o.size(); 412 } 413 return *this; 414 } 415 //! Move assignment operator 416 PaddedVector& operator=(PaddedVector&& o) noexcept 417 { 418 if (&o != this) 419 { 420 auto oSize = o.size(); 421 storage_ = std::move(o.storage_); 422 unpaddedEnd_ = begin() + oSize; 423 o.unpaddedEnd_ = o.begin(); 424 } 425 return *this; 426 } 427 //! Getter for the allocator 428 allocator_type get_allocator() const { return storage_.get_allocator(); } 429 430 private: 431 storage_type storage_; 432 iterator unpaddedEnd_; 433 }; 434 435 } // namespace gmx 436 437 // TODO These are hacks to avoid littering gmx:: all over code that is 438 // almost all destined to move into the gmx namespace at some point. 439 // An alternative would be about 20 files with using statements. 440 using gmx::PaddedVector; //NOLINT(google-global-names-in-headers) 441 442 #endif 443