1 /* 2 This file is part of MADNESS. 3 4 Copyright (C) 2007,2010 Oak Ridge National Laboratory 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 20 For more information please contact: 21 22 Robert J. Harrison 23 Oak Ridge National Laboratory 24 One Bethel Valley Road 25 P.O. Box 2008, MS-6367 26 27 email: harrisonrj@ornl.gov 28 tel: 865-241-3937 29 fax: 865-572-0680 30 */ 31 32 #ifndef MADNESS_TENSOR_TENSOR_H__INCLUDED 33 #define MADNESS_TENSOR_TENSOR_H__INCLUDED 34 35 #include <madness/madness_config.h> 36 #include <madness/misc/ran.h> 37 #include <madness/world/posixmem.h> 38 39 #include <memory> 40 #include <complex> 41 #include <vector> 42 #include <cmath> 43 #include <cstdlib> 44 #include <cstddef> 45 46 #include <madness/world/archive.h> 47 // #include <madness/world/print.h> 48 // 49 // typedef std::complex<float> float_complex; 50 // typedef std::complex<double> double_complex; 51 // 52 // // These probably have to be included in this order 53 // #include <madness/tensor/tensor_macros.h> 54 // #include <madness/tensor/type_data.h> 55 // #include <madness/tensor/slice.h> 56 // #include <madness/tensor/vector_factory.h> 57 #include <madness/tensor/basetensor.h> 58 #include <madness/tensor/aligned.h> 59 #include <madness/tensor/mxm.h> 60 #include <madness/tensor/tensorexcept.h> 61 #include <madness/tensor/tensoriter.h> 62 63 #ifdef USE_GENTENSOR 64 #define HAVE_GENTENSOR 1 65 #else 66 #define HAVE_GENTENSOR 0 67 #endif 68 69 70 /*! 71 \file tensor.h 72 \brief Defines and implements most of Tensor 73 \ingroup tensor 74 \addtogroup tensor 75 76 \par Introduction 77 78 A tensor is a multi-dimensional array and does not incorporate any concepts 79 of covariance and contravariance. 80 81 When a new tensor is created, the underlying data is also allocated. 82 E.g., 83 \code 84 Tensor<double> a(3,4,5) 85 \endcode 86 creates a new 3-dimensional tensor and allocates a contiguous 87 block of 60 doubles which are initialized to zero. The dimensions 88 (numbered from the left starting at 0) are in C or row-major 89 order. Thus, for the tensor \c a , the stride between successive 90 elements of the right-most dimension is 1. For the middle 91 dimension it is 5. For the left-most dimension it is 20. Thus, 92 the loops 93 \code 94 for (i=0; i<3; ++i) 95 for (j=0; j<4; ++j) 96 for (k=0; k<5; ++k) 97 a(i,j,k) = ... 98 \endcode 99 will go sequentially (and thus efficiently) through memory. 100 If the dimensions have been reordered (e.g., with \c swapdim() 101 or \c map() ), or if the tensor is actually a slice of another 102 tensor, then the layout in memory may be more complex and 103 may not reflect a contiguous block of memory. 104 105 Multiple tensors may be used to provide multiple identical or 106 distinct views of the same data. E.g., in the following 107 \code 108 Tensor<double> a(2,3); // A new tensor initialized to zero 109 Tensor<double> b = a; 110 \endcode 111 \c a and \c b provide identical views of the same data, thus 112 \code 113 b(1,2) = 99; 114 cout << a(1,2) << endl; // Outputs 99 115 cout << b(1,2) << endl; // Outputs 99 116 \endcode 117 118 \par Shallow copy and assignment 119 120 It is important to appreciate that the views and the data are 121 quite independent. In particular, the default copy constructor 122 and assignment operations only copy the tensor (the view) and not 123 the data --- <em> i.e., the copy constructor and assigment operations 124 only take shallow copies</em>. This is for both consistency and 125 efficiency. Thus, assigning one tensor to another generates another 126 view of the same data, replacing any previous view and not moving 127 or copying any of the data. 128 E.g., 129 \code 130 Tensor<double> a(2,3); // A new tensor initialized to zero 131 Tensor<double> c(3,3,3); // Another new tensor 132 Tensor<double> b = a; // b is a view of the same data as a 133 a = c; // a is now a view of c's data 134 b = c // b is now also a view of c's data and the 135 // data allocated originally for a is freed 136 \endcode 137 The above example also illustrates how reference counting is used 138 to keep track of the underlying data. Once there are no views 139 of the data, it is automatically freed. 140 141 There are only two ways to actually copy the underlying data. A 142 new, complete, and contiguous copy of a tensor and its data may be 143 generated with the \c copy() function. Or, to copy data from one tensor 144 into the data viewed by another tensor, you must use a Slice. 145 146 \par Indexing 147 148 One dimensional tensors (i.e., vectors) may be indexed using 149 either square brackets (e.g., \c v[i] ) or round brackets (e.g., 150 \c v(i) ). All higher-dimensional tensors must use only round 151 brackets (e.g., \c t(i,j,k) ). This is due to C++'s restriction 152 that the indexing operator (\c [] ) can only have one argument. 153 The indexing operation should generate efficient code. 154 155 For the sake of efficiency, no bounds checking is performed by 156 default by most single element indexing operations. Checking can 157 be enabled at compile time by defining \c -DTENSOR_BOUNDS_CHECKING for 158 application files including \c tensor.h. The MADNESS configure script 159 has the option \c --enable-tensor-bound-checking to define the macro 160 in \c madness_config.h . The general indexing 161 operation that takes a \c std::vector<long> index and all slicing 162 operations always perform bounds checking. To make indexing with 163 checking a bit easier, a factory function has been provided for 164 vectors ... but note you need to explicitly use longs as the 165 index. 166 \code 167 Tensor<long> a(7,7,7); 168 a(3,4,5) += 1; // OK ... adds 1 to element (3,4,5) 169 a(3,4,9) += 1; // BAD ... undetected out-of-bounds access 170 a(vector_factory(3L,4L,9L)) += 1; // OK ... out-bounds access will 171 // be detected at runtime. 172 \endcode 173 174 \par Slicing 175 176 Slices generate sub-tensors --- i.e., views of patches of the 177 data. E.g., to refer to all but the first and last elements in 178 each dimension of a matrix use 179 \code 180 a(Slice(1,-2),Slice(1,-2)) 181 \endcode 182 Or to view odd elements in each dimension 183 \code 184 a(Slice(0,-1,2),Slice(0,-1,2)) 185 \endcode 186 A slice or patch of a 187 tensor behaves exactly like a tensor \em except for assignment. 188 When a slice is assigned to, the data is copied with the 189 requirement that the source and destinations agree in size and 190 shape (i.e., they conform). Thus, to copy the all of the data 191 from a to b, 192 \code 193 Tensor<double> a(3,4), b(3,4); 194 a = 1; // Set all elements of a to 1 195 b = 2; // Set all elements of b to 2 196 a(Slice(0,-1,1),Slice(0,-1,1)) = b; // Copy all data from b to a 197 a(_,_) = b(_,_); // Copy all data from b to a 198 a(___) = b(___); // Copy all data from b to a 199 a(Slice(1,2),Slice(1,2)) = b; // Error, do not conform 200 \endcode 201 Special slice values \c _ ,\c _reverse, and \c ___ have 202 been defined to refer to all elements in a dimension, all 203 elements in a dimension but reversed, and all elements in all 204 dimensions, respectively. 205 206 \par Iteration and algorithms 207 208 See tensor_macros.h for documentation on the easiest mechanisms for iteration over 209 elements of tensors and tips for optimization. See \c TensorIterator for 210 the most general form of iteration. 211 */ 212 213 214 #ifndef HAVE_STD_ABS_LONG 215 #ifndef HAVE_STD_LABS 216 namespace std { abs(long a)217 static long abs(long a) { 218 return a>=0 ? a : -a; 219 } 220 } 221 #else 222 namespace std { abs(long a)223 static long abs(long a) { 224 return std::labs(a); 225 } 226 } 227 #endif 228 #endif 229 230 231 namespace madness { 232 #define IS_ODD(n) ((n)&0x1) 233 #define IS_UNALIGNED(p) (((unsigned long)(p))&0x7) 234 235 236 /// For real types return value, for complex return conjugate 237 template <typename Q, bool iscomplex> 238 struct conditional_conj_struct { opconditional_conj_struct239 static Q op(const Q& coeff) { 240 return coeff; 241 } 242 }; 243 244 /// For real types return value, for complex return conjugate 245 template <typename Q> 246 struct conditional_conj_struct<Q,true> { 247 static Q op(const Q& coeff) { 248 return conj(coeff); 249 } 250 }; 251 252 /// For real types return value, for complex return conjugate 253 template <typename Q> 254 Q conditional_conj(const Q& coeff) { 255 return conditional_conj_struct<Q,TensorTypeData<Q>::iscomplex>::op(coeff); 256 } 257 258 namespace detail { 259 template <typename T> T mynorm(T t) { 260 return t*t; 261 } 262 263 template <typename T> T mynorm(std::complex<T> t) { 264 return std::norm(t); 265 } 266 } 267 268 template <class T> class SliceTensor; 269 270 271 /// low rank representations of tensors (see gentensor.h) 272 enum TensorType {TT_NONE, TT_FULL, TT_2D, TT_TENSORTRAIN}; 273 274 static 275 inline 276 std::ostream& operator << (std::ostream& s, const TensorType& tt) { 277 std::string str="confused tensor type"; 278 if (tt==TT_FULL) str="full rank tensor"; 279 if (tt==TT_2D) str="low rank tensor 2-way"; 280 if (tt==TT_TENSORTRAIN) str="tensor train"; 281 if (tt==TT_NONE) str="no tensor type specified"; 282 s << str.c_str(); 283 return s; 284 } 285 286 //#define TENSOR_USE_SHARED_ALIGNED_ARRAY 287 #ifdef TENSOR_USE_SHARED_ALIGNED_ARRAY 288 #define TENSOR_SHARED_PTR detail::SharedAlignedArray 289 // this code has been tested and seems to work correctly on all 290 // tests and moldft ... however initial testing indicated a hard 291 // to measure improvement in thread scaling (from 20 to 60 threads 292 // on cn-mem) and no change in the modest thread count (20) 293 // execution time with tbballoc. hence we are not presently using 294 // it but will test more in the future with different allocators 295 namespace detail { 296 // Minimal ref-counted array with data+counter in one alloc 297 template <typename T> class SharedAlignedArray { 298 T* volatile p; 299 AtomicInt* cnt; 300 void dec() {if (p && ((*cnt)-- == 1)) {free(p); p = 0;}} 301 void inc() {if (p) (*cnt)++;} 302 public: 303 SharedAlignedArray() : p(0), cnt(0) {} 304 T* allocate(std::size_t size, unsigned int alignment) { 305 std::size_t offset = (size*sizeof(T)-1)/sizeof(AtomicInt) + 1; // Where the counter will be 306 std::size_t nbyte = (offset+1)*sizeof(AtomicInt); 307 if (posix_memalign((void **) &p, alignment, nbyte)) throw 1; 308 cnt = (AtomicInt*)(p) + offset; 309 *cnt = 1; 310 return p; 311 } 312 SharedAlignedArray<T>& operator=(const SharedAlignedArray<T>& other) { 313 if (this != &other) {dec(); p = other.p; cnt = other.cnt; inc();} 314 return *this; 315 } 316 void reset() {dec(); p = 0;} 317 ~SharedAlignedArray() {dec();} 318 }; 319 } 320 #else 321 #define TENSOR_SHARED_PTR std::shared_ptr 322 #endif 323 324 /// A tensor is a multidimension array 325 326 /// \ingroup tensor 327 template <class T> class Tensor : public BaseTensor { 328 template <class U> friend class SliceTensor; 329 330 protected: 331 T* MADNESS_RESTRICT _p; 332 TENSOR_SHARED_PTR <T> _shptr; 333 334 void allocate(long nd, const long d[], bool dozero) { 335 _id = TensorTypeData<T>::id; 336 if (nd < 0) { 337 _p = 0; 338 _shptr.reset(); 339 _size = 0; 340 _ndim = -1; 341 return; 342 } 343 344 TENSOR_ASSERT(nd>0 && nd <= TENSOR_MAXDIM,"invalid ndim in new tensor", nd, 0); 345 // sanity check ... 2GB in doubles 346 for (int i=0; i<nd; ++i) { 347 TENSOR_ASSERT(d[i]>=0 && d[i]<268435456, "invalid dimension size in new tensor",d[i],0); 348 } 349 set_dims_and_size(nd, d); 350 if (_size) { 351 TENSOR_ASSERT(_size>=0 && _size<268435456, "invalid size in new tensor",_size,0); 352 try { 353 #if HAVE_IBMBGP 354 #define TENSOR_ALIGNMENT 16 355 #elif HAVE_IBMBGQ 356 #define TENSOR_ALIGNMENT 32 357 #elif MADNESS_HAVE_AVX2 358 /* 32B alignment is best for performance according to 359 * http://www.nas.nasa.gov/hecc/support/kb/haswell-processors_492.html */ 360 #define TENSOR_ALIGNMENT 32 361 #elif MADNESS_HAVE_AVX512 362 /* One can infer from the AVX2 case that 64B alignment helps with 512b SIMD. */ 363 #define TENSOR_ALIGNMENT 64 364 #else 365 // Typical cache line size 366 #define TENSOR_ALIGNMENT 64 367 #endif 368 369 #ifdef TENSOR_USE_SHARED_ALIGNED_ARRAY 370 _p = _shptr.allocate(_size, TENSOR_ALIGNMENT); 371 #elif defined WORLD_GATHER_MEM_STATS 372 _p = new T[_size]; 373 _shptr = std::shared_ptr<T>(_p); 374 #else 375 if (posix_memalign((void **) &_p, TENSOR_ALIGNMENT, sizeof(T)*_size)) throw 1; 376 _shptr.reset(_p, &free); 377 #endif 378 } 379 catch (...) { 380 std::printf("new failed nd=%ld type=%ld size=%ld\n", nd, id(), _size); 381 std::printf(" %ld %ld %ld %ld %ld %ld\n", 382 d[0], d[1], d[2], d[3], d[4], d[5]); 383 TENSOR_EXCEPTION("new failed",_size,this); 384 } 385 //std::printf("allocated %p [%ld] %ld\n", _p, size, p.use_count()); 386 if (dozero) { 387 //T zero = 0; for (long i=0; i<_size; ++i) _p[i] = zero; 388 // or 389 #ifdef HAVE_MEMSET 390 memset((void *) _p, 0, _size*sizeof(T)); 391 #else 392 aligned_zero(_size, _p); 393 #endif 394 } 395 } 396 else { 397 _p = 0; 398 _shptr.reset(); 399 } 400 } 401 402 // Free memory and restore default constructor state 403 void deallocate() { 404 _p = 0; 405 _shptr.reset(); 406 _size = 0; 407 _ndim = -1; 408 } 409 410 public: 411 /// C++ typename of this tensor. 412 typedef T type; 413 414 /// C++ typename of the real type associated with a complex type. 415 typedef typename TensorTypeData<T>::scalar_type scalar_type; 416 417 /// C++ typename of the floating point type associated with scalar real type 418 typedef typename TensorTypeData<T>::float_scalar_type float_scalar_type; 419 420 /// Default constructor does not allocate any data and sets ndim=-1, size=0, _p=0, and id. 421 Tensor() : _p(0) { 422 _id = TensorTypeData<T>::id; 423 } 424 425 /// Copy constructor is shallow (same as assignment) 426 427 /// \em Caveat \em emptor: The shallow copy constructor has many virtues but 428 /// enables you to violate constness with simple code such as 429 /// \code 430 /// const Tensor<double> a(5); 431 /// Tensor<double> b(a); 432 /// b[1] = 3; // a[1] is now also 3 433 /// \endcode 434 Tensor(const Tensor<T>& t) { 435 _id = TensorTypeData<T>::id; 436 *this = t; 437 } 438 439 /// Assignment is shallow (same as copy constructor) 440 441 /// \em Caveat \em emptor: The shallow assignment has many virtues but 442 /// enables you to violate constness with simple code such as 443 /// \code 444 /// const Tensor<double> a(5); 445 /// Tensor<double> b; 446 /// b = a; 447 /// b[1] = 3; // a[1] is now also 3 448 /// \endcode 449 Tensor<T>& operator=(const Tensor<T>& t) { 450 if (this != &t) { 451 _p = t._p; 452 _shptr = t._shptr; 453 _size = t._size; 454 _ndim = t._ndim; 455 for (int i=0; i<TENSOR_MAXDIM; ++i) { 456 _dim[i] = t._dim[i]; 457 _stride[i] = t._stride[i]; 458 } 459 } 460 return *this; 461 } 462 463 464 /// Type conversion makes a deep copy 465 template <class Q> operator Tensor<Q>() const { // type conv => deep copy 466 Tensor<Q> result = Tensor<Q>(this->_ndim,this->_dim,false); 467 BINARY_OPTIMIZED_ITERATOR(Q, result, const T, (*this), *_p0 = (Q)(*_p1)); 468 return result; 469 } 470 471 472 /// Create and zero new 1-d tensor 473 474 /// @param[in] d0 Size of dimension 0 475 explicit Tensor(long d0) : _p(0) { 476 _dim[0] = d0; 477 allocate(1, _dim, true); 478 } 479 480 /// Create and zero new 2-d tensor 481 482 /// @param[in] d0 Size of dimension 0 483 /// @param[in] d1 Size of dimension 1 484 explicit Tensor(long d0, long d1) : _p(0) { 485 _dim[0] = d0; _dim[1] = d1; 486 allocate(2, _dim, true); 487 } 488 489 /// Create and zero new 3-d tensor 490 491 /// @param[in] d0 Size of dimension 0 492 /// @param[in] d1 Size of dimension 1 493 /// @param[in] d2 Size of dimension 2 494 explicit Tensor(long d0, long d1, long d2) : _p(0) { 495 _dim[0] = d0; _dim[1] = d1; _dim[2] = d2; 496 allocate(3, _dim, true); 497 } 498 499 /// Create and zero new 4-d tensor 500 501 /// @param[in] d0 Size of dimension 0 502 /// @param[in] d1 Size of dimension 1 503 /// @param[in] d2 Size of dimension 2 504 /// @param[in] d3 Size of dimension 3 505 explicit Tensor(long d0, long d1, long d2, long d3) : _p(0) { 506 _dim[0] = d0; _dim[1] = d1; _dim[2] = d2; _dim[3] = d3; 507 allocate(4, _dim, true); 508 } 509 510 /// Create and zero new 5-d tensor 511 512 /// @param[in] d0 Size of dimension 0 513 /// @param[in] d1 Size of dimension 1 514 /// @param[in] d2 Size of dimension 2 515 /// @param[in] d3 Size of dimension 3 516 /// @param[in] d4 Size of dimension 4 517 explicit Tensor(long d0, long d1, long d2, long d3, long d4) : _p(0) { 518 _dim[0] = d0; _dim[1] = d1; _dim[2] = d2; _dim[3] = d3; _dim[4] = d4; 519 allocate(5, _dim, true); 520 } 521 522 /// Create and zero new 6-d tensor 523 524 /// @param[in] d0 Size of dimension 0 525 /// @param[in] d1 Size of dimension 1 526 /// @param[in] d2 Size of dimension 2 527 /// @param[in] d3 Size of dimension 3 528 /// @param[in] d4 Size of dimension 4 529 /// @param[in] d5 Size of dimension 5 530 explicit Tensor(long d0, long d1, long d2, long d3, long d4, long d5) { 531 _dim[0] = d0; _dim[1] = d1; _dim[2] = d2; _dim[3] = d3; _dim[4] = d4; _dim[5] = d5; 532 allocate(6, _dim, true); 533 } 534 535 /// Create and optionally zero new n-d tensor. This is the most general constructor. 536 537 /// @param[in] d Vector containing size of each dimension, number of dimensions inferred from vcector size. 538 /// @param[in] dozero If true (default) the tensor is initialized to zero 539 explicit Tensor(const std::vector<long>& d, bool dozero=true) : _p(0) { 540 allocate(d.size(), d.size() ? &(d[0]) : 0, dozero); 541 } 542 543 /// Politically incorrect general constructor. 544 545 /// @param[in] nd Number of dimensions 546 /// @param[in] d Size of each dimension 547 /// @param[in] dozero If true (default) the tensor is initialized to zero 548 explicit Tensor(long nd, const long d[], bool dozero=true) : _p(0) { 549 allocate(nd,d,dozero); 550 } 551 552 /// Inplace fill tensor with scalar 553 554 /// @param[in] x Value used to fill tensor via assigment 555 /// @return %Reference to this tensor 556 Tensor<T>& operator=(T x) { 557 UNARY_OPTIMIZED_ITERATOR(T,(*this),*_p0 = x); 558 return *this; 559 } 560 561 /// Inplace fill with a scalar (legacy name) 562 563 /// @param[in] x Value used to fill tensor via assigment 564 /// @return %Reference to this tensor 565 Tensor<T>& fill(T x) { 566 *this = x; 567 return *this; 568 } 569 570 /// Inplace addition of two tensors 571 572 /// @param[in] t Conforming tensor to be added in-place to this tensor 573 /// @return %Reference to this tensor 574 template <typename Q> 575 Tensor<T>& operator+=(const Tensor<Q>& t) { 576 BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 += *_p1); 577 return *this; 578 } 579 580 /// Inplace subtraction of two tensors 581 582 /// @param[in] t Conforming tensor to be subtracted in-place from this tensor 583 /// @return %Reference to this tensor 584 template <typename Q> 585 Tensor<T>& operator-=(const Tensor<Q>& t) { 586 BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 -= *_p1); 587 return *this; 588 } 589 590 /// Addition of two tensors to produce a new tensor 591 592 /// @param[in] t Conforming tensor to be added out-of-place to this tensor 593 /// @return New tensor 594 template <typename Q> 595 Tensor< TENSOR_RESULT_TYPE(T,Q) > operator+(const Tensor<Q>& t) const { 596 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 597 Tensor<resultT> result(_ndim,_dim,false); 598 TERNARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), const Q, t, *_p0 = *_p1 + *_p2); 599 return result; 600 } 601 602 /// Subtraction of two tensors to produce a new tensor 603 604 /// @param[in] t Conforming tensor to be subtracted out-of-place from this tensor 605 /// @return New tensor 606 template <typename Q> 607 Tensor< TENSOR_RESULT_TYPE(T,Q) > operator-(const Tensor<Q>& t) const { 608 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 609 Tensor<resultT> result(_ndim,_dim,false); 610 TERNARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), const Q, t, *_p0 = *_p1 - *_p2); 611 return result; 612 } 613 614 /// Multiplication of tensor by a scalar of a supported type to produce a new tensor 615 616 /// @param[in] x Scalar value 617 /// @return New tensor 618 template <typename Q> 619 typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type 620 operator*(const Q& x) const { 621 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 622 Tensor<resultT> result(_ndim,_dim,false); 623 BINARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), *_p0 = *_p1 * x); 624 return result; 625 } 626 627 /// Divide tensor by a scalar of a supported type to produce a new tensor 628 629 /// @param[in] x Scalar value 630 /// @return New tensor 631 template <typename Q> 632 typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type 633 operator/(const Q& x) const { 634 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 635 Tensor<resultT> result(_ndim,_dim); 636 BINARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), *_p0 = *_p1 / x); 637 return result; 638 } 639 640 /// Add a scalar of the same type to all elements of a tensor producing a new tensor 641 642 /// @param[in] x Scalar value 643 /// @return New tensor 644 template <typename Q> 645 typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type 646 operator+(const Q& x) const { 647 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 648 Tensor<resultT> result(_ndim,_dim); 649 BINARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), *_p0 = *_p1 + x); 650 return result; 651 } 652 653 /// Subtract a scalar of the same type from all elements producing a new tensor 654 655 /// @param[in] x Scalar value 656 /// @return New tensor 657 template <typename Q> 658 typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type 659 operator-(const Q& x) const { 660 return (*this) + (-x); 661 } 662 663 /// Unary negation producing a new tensor 664 665 /// @return New tensor 666 Tensor<T> operator-() const { 667 Tensor<T> result = Tensor<T>(_ndim,_dim,false); 668 BINARY_OPTIMIZED_ITERATOR(T, result, const T, (*this), *(_p0) = - (*_p1)); 669 return result; 670 } 671 672 /// Inplace multiplication by scalar of supported type 673 674 /// @param[in] x Scalar value 675 /// @return %Reference to this tensor 676 template <typename Q> 677 typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type 678 operator*=(const Q& x) { 679 UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 *= x); 680 return *this; 681 } 682 683 /// Inplace multiplication by scalar of supported type (legacy name) 684 685 /// @param[in] x Scalar value 686 /// @return %Reference to this tensor 687 template <typename Q> 688 typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type 689 scale(Q x) { 690 return (*this)*=x; 691 } 692 693 /// Inplace increment by scalar of supported type 694 695 /// @param[in] x Scalar value 696 /// @return %Reference to this tensor 697 template <typename Q> 698 typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type 699 operator+=(const Q& x) { 700 UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 += x); 701 return *this; 702 } 703 704 /// Inplace decrement by scalar of supported type 705 706 /// @param[in] x Scalar value 707 /// @return %Reference to this tensor 708 template <typename Q> 709 typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type 710 operator-=(const Q& x) { 711 UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 -= x); 712 return *this; 713 } 714 715 716 /// Inplace complex conjugate 717 718 /// @return %Reference to this tensor 719 Tensor<T>& conj() { 720 UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 = conditional_conj(*_p0)); 721 return *this; 722 } 723 724 /// Inplace fill with random values ( \c [0,1] for floats, \c [0,MAXSIZE] for integers) 725 726 /// @return %Reference to this tensor 727 Tensor<T>& fillrandom() { 728 if (iscontiguous()) { 729 madness::RandomVector<T>(size(), ptr()); 730 } 731 else { 732 UNARY_OPTIMIZED_ITERATOR(T,(*this), *_p0 = madness::RandomValue<T>()); 733 } 734 return *this; 735 } 736 737 /// Inplace fill with the index of each element 738 739 /// Each element is assigned it's logical index according to this loop structure 740 /// \code 741 /// Tensor<float> t(5,6,7,...) 742 /// long index=0; 743 /// for (long i=0; i<_dim[0]; ++i) 744 /// for (long j=0; j<_dim[1]; ++j) 745 /// for (long k=0; k<_dim[2]; ++k) 746 /// ... 747 /// tensor(i,j,k,...) = index++ 748 /// \endcode 749 /// 750 /// @return %Reference to this tensor 751 Tensor<T>& fillindex() { 752 long count = 0; 753 UNARY_UNOPTIMIZED_ITERATOR(T,(*this), *_p0 = count++); // Fusedim would be OK 754 return *this; 755 } 756 757 /// Inplace set elements of \c *this less than \c x in absolute magnitude to zero. 758 759 /// @param[in] x Scalar value 760 /// @return %Reference to this tensor 761 Tensor<T>& screen(double x) { 762 T zero = 0; 763 UNARY_OPTIMIZED_ITERATOR(T,(*this), if (std::abs(*_p0)<x) *_p0=zero); 764 return *this; 765 } 766 767 768 /// Return true if bounds checking was enabled at compile time 769 770 /// @return True if bounds checking was enabled at compile time 771 static bool bounds_checking() { 772 #ifdef TENSOR_BOUNDS_CHECKING 773 return true; 774 #else 775 return false; 776 #endif 777 } 778 779 /// 1-d indexing operation using \c [] \em without bounds checking. 780 781 /// @param[in] i index for dimension 0 782 /// @return %Reference to element 783 T& operator[](long i) { 784 #ifdef TENSOR_BOUNDS_CHECKING 785 TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this); 786 #endif 787 return _p[i*_stride[0]]; 788 } 789 790 /// 1-d indexing operation using \c [] \em without bounds checking. 791 792 /// @param[in] i index for dimension 0 793 /// @return %Reference to element 794 const T& operator[](long i) const { 795 #ifdef TENSOR_BOUNDS_CHECKING 796 TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this); 797 #endif 798 return _p[i*_stride[0]]; 799 } 800 801 /// 1-d indexing operation \em without bounds checking. 802 803 /// @param[in] i index for dimension 0 804 /// @return %Reference to element 805 T& operator()(long i) { 806 #ifdef TENSOR_BOUNDS_CHECKING 807 TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this); 808 #endif 809 return _p[i*_stride[0]]; 810 } 811 812 /// 1-d indexing operation \em without bounds checking. 813 814 /// @param[in] i index for dimension 0 815 /// @return %Reference to element 816 const T& operator()(long i) const { 817 #ifdef TENSOR_BOUNDS_CHECKING 818 TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this); 819 #endif 820 return _p[i*_stride[0]]; 821 } 822 823 /// 2-d indexing operation \em without bounds checking. 824 825 /// @param[in] i index for dimension 0 826 /// @param[in] j index for dimension 1 827 /// @return %Reference to element 828 T& operator()(long i, long j) { 829 #ifdef TENSOR_BOUNDS_CHECKING 830 TENSOR_ASSERT(i>=0 && i<_dim[0],"2d bounds check failed dim=0",i,this); 831 TENSOR_ASSERT(j>=0 && j<_dim[1],"2d bounds check failed dim=1",j,this); 832 #endif 833 return _p[i*_stride[0]+j*_stride[1]]; 834 } 835 836 /// 2-d indexing operation \em without bounds checking. 837 838 /// @param[in] i index for dimension 0 839 /// @param[in] j index for dimension 1 840 /// @return %Reference to element 841 const T& operator()(long i, long j) const { 842 #ifdef TENSOR_BOUNDS_CHECKING 843 TENSOR_ASSERT(i>=0 && i<_dim[0],"2d bounds check failed dim=0",i,this); 844 TENSOR_ASSERT(j>=0 && j<_dim[1],"2d bounds check failed dim=1",j,this); 845 #endif 846 return _p[i*_stride[0]+j*_stride[1]]; 847 } 848 849 /// 3-d indexing operation \em without bounds checking. 850 851 /// @param[in] i index for dimension 0 852 /// @param[in] j index for dimension 1 853 /// @param[in] k index for dimension 2 854 /// @return %Reference to element 855 T& operator()(long i, long j, long k) { 856 #ifdef TENSOR_BOUNDS_CHECKING 857 TENSOR_ASSERT(i>=0 && i<_dim[0],"3d bounds check failed dim=0",i,this); 858 TENSOR_ASSERT(j>=0 && j<_dim[1],"3d bounds check failed dim=1",j,this); 859 TENSOR_ASSERT(k>=0 && k<_dim[2],"3d bounds check failed dim=2",k,this); 860 #endif 861 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]]; 862 } 863 864 /// 3-d indexing operation \em without bounds checking. 865 866 /// @param[in] i index for dimension 0 867 /// @param[in] j index for dimension 1 868 /// @param[in] k index for dimension 2 869 /// @return %Reference to element 870 const T& operator()(long i, long j, long k) const { 871 #ifdef TENSOR_BOUNDS_CHECKING 872 TENSOR_ASSERT(i>=0 && i<_dim[0],"3d bounds check failed dim=0",i,this); 873 TENSOR_ASSERT(j>=0 && j<_dim[1],"3d bounds check failed dim=1",j,this); 874 TENSOR_ASSERT(k>=0 && k<_dim[2],"3d bounds check failed dim=2",k,this); 875 #endif 876 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]]; 877 } 878 879 /// 4-d indexing operation \em without bounds checking. 880 881 /// @param[in] i index for dimension 0 882 /// @param[in] j index for dimension 1 883 /// @param[in] k index for dimension 2 884 /// @param[in] l index for dimension 3 885 /// @return %Reference to element 886 T& operator()(long i, long j, long k, long l) { 887 #ifdef TENSOR_BOUNDS_CHECKING 888 TENSOR_ASSERT(i>=0 && i<_dim[0],"4d bounds check failed dim=0",i,this); 889 TENSOR_ASSERT(j>=0 && j<_dim[1],"4d bounds check failed dim=1",j,this); 890 TENSOR_ASSERT(k>=0 && k<_dim[2],"4d bounds check failed dim=2",k,this); 891 TENSOR_ASSERT(l>=0 && l<_dim[3],"4d bounds check failed dim=3",l,this); 892 #endif 893 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+ 894 l*_stride[3]]; 895 } 896 897 /// 4-d indexing operation \em without bounds checking. 898 899 /// @param[in] i index for dimension 0 900 /// @param[in] j index for dimension 1 901 /// @param[in] k index for dimension 2 902 /// @param[in] l index for dimension 3 903 /// @return %Reference to element 904 const T& operator()(long i, long j, long k, long l) const { 905 #ifdef TENSOR_BOUNDS_CHECKING 906 TENSOR_ASSERT(i>=0 && i<_dim[0],"4d bounds check failed dim=0",i,this); 907 TENSOR_ASSERT(j>=0 && j<_dim[1],"4d bounds check failed dim=1",j,this); 908 TENSOR_ASSERT(k>=0 && k<_dim[2],"4d bounds check failed dim=2",k,this); 909 TENSOR_ASSERT(l>=0 && l<_dim[3],"4d bounds check failed dim=3",l,this); 910 #endif 911 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+ 912 l*_stride[3]]; 913 } 914 915 /// 5-d indexing operation \em without bounds checking. 916 917 /// @param[in] i index for dimension 0 918 /// @param[in] j index for dimension 1 919 /// @param[in] k index for dimension 2 920 /// @param[in] l index for dimension 3 921 /// @param[in] m index for dimension 4 922 /// @return %Reference to element 923 T& operator()(long i, long j, long k, long l, long m) { 924 #ifdef TENSOR_BOUNDS_CHECKING 925 TENSOR_ASSERT(i>=0 && i<_dim[0],"5d bounds check failed dim=0",i,this); 926 TENSOR_ASSERT(j>=0 && j<_dim[1],"5d bounds check failed dim=1",j,this); 927 TENSOR_ASSERT(k>=0 && k<_dim[2],"5d bounds check failed dim=2",k,this); 928 TENSOR_ASSERT(l>=0 && l<_dim[3],"5d bounds check failed dim=3",l,this); 929 TENSOR_ASSERT(m>=0 && m<_dim[4],"5d bounds check failed dim=4",m,this); 930 #endif 931 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+ 932 l*_stride[3]+m*_stride[4]]; 933 } 934 935 /// 5-d indexing operation \em without bounds checking. 936 937 /// @param[in] i index for dimension 0 938 /// @param[in] j index for dimension 1 939 /// @param[in] k index for dimension 2 940 /// @param[in] l index for dimension 3 941 /// @param[in] m index for dimension 4 942 /// @return %Reference to element 943 const T& operator()(long i, long j, long k, long l, long m) const { 944 #ifdef TENSOR_BOUNDS_CHECKING 945 TENSOR_ASSERT(i>=0 && i<_dim[0],"5d bounds check failed dim=0",i,this); 946 TENSOR_ASSERT(j>=0 && j<_dim[1],"5d bounds check failed dim=1",j,this); 947 TENSOR_ASSERT(k>=0 && k<_dim[2],"5d bounds check failed dim=2",k,this); 948 TENSOR_ASSERT(l>=0 && l<_dim[3],"5d bounds check failed dim=3",l,this); 949 TENSOR_ASSERT(m>=0 && m<_dim[4],"5d bounds check failed dim=4",m,this); 950 #endif 951 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+ 952 l*_stride[3]+m*_stride[4]]; 953 } 954 955 /// 6-d indexing operation \em without bounds checking. 956 957 /// @param[in] i index for dimension 0 958 /// @param[in] j index for dimension 1 959 /// @param[in] k index for dimension 2 960 /// @param[in] l index for dimension 3 961 /// @param[in] m index for dimension 4 962 /// @param[in] n index for dimension 5 963 /// @return %Reference to element 964 T& operator()(long i, long j, long k, long l, long m, long n) { 965 #ifdef TENSOR_BOUNDS_CHECKING 966 TENSOR_ASSERT(i>=0 && i<_dim[0],"6d bounds check failed dim=0",i,this); 967 TENSOR_ASSERT(j>=0 && j<_dim[1],"6d bounds check failed dim=1",j,this); 968 TENSOR_ASSERT(k>=0 && k<_dim[2],"6d bounds check failed dim=2",k,this); 969 TENSOR_ASSERT(l>=0 && l<_dim[3],"6d bounds check failed dim=3",l,this); 970 TENSOR_ASSERT(m>=0 && m<_dim[4],"6d bounds check failed dim=4",m,this); 971 TENSOR_ASSERT(n>=0 && n<_dim[5],"6d bounds check failed dim=5",n,this); 972 #endif 973 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+ 974 l*_stride[3]+m*_stride[4]+n*_stride[5]]; 975 } 976 977 /// 6-d indexing operation \em without bounds checking. 978 979 /// @param[in] i index for dimension 0 980 /// @param[in] j index for dimension 1 981 /// @param[in] k index for dimension 2 982 /// @param[in] l index for dimension 3 983 /// @param[in] m index for dimension 4 984 /// @param[in] n index for dimension 5 985 /// @return %Reference to element 986 const T& operator()(long i, long j, long k, long l, long m, long n) const { 987 #ifdef TENSOR_BOUNDS_CHECKING 988 TENSOR_ASSERT(i>=0 && i<_dim[0],"6d bounds check failed dim=0",i,this); 989 TENSOR_ASSERT(j>=0 && j<_dim[1],"6d bounds check failed dim=1",j,this); 990 TENSOR_ASSERT(k>=0 && k<_dim[2],"6d bounds check failed dim=2",k,this); 991 TENSOR_ASSERT(l>=0 && l<_dim[3],"6d bounds check failed dim=3",l,this); 992 TENSOR_ASSERT(m>=0 && m<_dim[4],"6d bounds check failed dim=4",m,this); 993 TENSOR_ASSERT(n>=0 && n<_dim[5],"6d bounds check failed dim=5",n,this); 994 #endif 995 return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+ 996 l*_stride[3]+m*_stride[4]+n*_stride[5]]; 997 } 998 999 /// Politically incorrect general indexing operation \em without bounds checking. 1000 1001 /// @param[in] ind Array containing index for each dimension 1002 /// @return %Reference to element 1003 T& operator()(const long ind[]) { 1004 long offset = 0; 1005 for (int d=0; d<_ndim; ++d) { 1006 long i = ind[d]; 1007 #ifdef TENSOR_BOUNDS_CHECKING 1008 TENSOR_ASSERT(i>=0 && i<_dim[0],"non-PC general indexing bounds check failed dim=",d,this); 1009 #endif 1010 offset += i*_stride[d]; 1011 } 1012 return _p[offset]; 1013 } 1014 1015 /// Politically incorrect general indexing operation \em without bounds checking. 1016 1017 /// @param[in] ind Array containing index for each dimension 1018 /// @return %Reference to element 1019 const T& operator()(const long ind[]) const { 1020 long offset = 0; 1021 for (int d=0; d<_ndim; ++d) { 1022 long i = ind[d]; 1023 #ifdef TENSOR_BOUNDS_CHECKING 1024 TENSOR_ASSERT(i>=0 && i<_dim[0],"non-PC general indexing bounds check failed dim=",d,this); 1025 #endif 1026 offset += i*_stride[d]; 1027 } 1028 return _p[offset]; 1029 } 1030 1031 /// General indexing operation \em with bounds checking. 1032 1033 /// @param[in] ind Vector containing index for each dimension 1034 /// @return %Reference to element 1035 T& operator()(const std::vector<long> ind) { 1036 TENSOR_ASSERT(ind.size()>=(unsigned int) _ndim,"invalid number of dimensions",ind.size(),this); 1037 long index=0; 1038 for (long d=0; d<_ndim; ++d) { 1039 TENSOR_ASSERT(ind[d]>=0 && ind[d]<_dim[d],"out-of-bounds access",ind[d],this); 1040 index += ind[d]*_stride[d]; 1041 } 1042 return _p[index]; 1043 } 1044 1045 /// General indexing operation \em with bounds checking. 1046 1047 /// @param[in] ind Vector containing index for each dimension 1048 /// @return %Reference to element 1049 const T& operator()(const std::vector<long> ind) const { 1050 TENSOR_ASSERT(ind.size()>=(unsigned int) _ndim,"invalid number of dimensions",ind.size(),this); 1051 long index=0; 1052 for (long d=0; d<_ndim; ++d) { 1053 TENSOR_ASSERT(ind[d]>=0 && ind[d]<_dim[d],"out-of-bounds access",ind[d],this); 1054 index += ind[d]*_stride[d]; 1055 } 1056 return _p[index]; 1057 } 1058 1059 /// General slicing operation 1060 1061 /// @param[in] s Vector containing slice for each dimension 1062 /// @return SliceTensor viewing patch of original tensor 1063 SliceTensor<T> operator()(const std::vector<Slice>& s) { 1064 TENSOR_ASSERT(s.size()>=(unsigned)(this->ndim()), "invalid number of dimensions", 1065 this->ndim(),this); 1066 return SliceTensor<T>(*this,&(s[0])); 1067 } 1068 1069 /// General slicing operation (const) 1070 1071 /// @param[in] s Vector containing slice for each dimension 1072 /// @return Constant Tensor viewing patch of original tensor 1073 const Tensor<T> operator()(const std::vector<Slice>& s) const { 1074 TENSOR_ASSERT(s.size()>=(unsigned)(this->ndim()), "invalid number of dimensions", 1075 this->ndim(),this); 1076 return SliceTensor<T>(*this,&(s[0])); 1077 } 1078 1079 /// Return a 1d SliceTensor that views the specified range of the 1d Tensor 1080 1081 /// @return SliceTensor viewing patch of original tensor 1082 SliceTensor<T> operator()(const Slice& s0) { 1083 TENSOR_ASSERT(this->ndim()==1,"invalid number of dimensions", 1084 this->ndim(),this); 1085 Slice s[1] = {s0}; 1086 return SliceTensor<T>(*this,s); 1087 } 1088 1089 /// Return a 1d SliceTensor that views the specified range of the 1d Tensor 1090 1091 /// @return Constant Tensor viewing patch of original tensor: \f$ R(*,*,\ldots) \rightarrow I(*,*,\ldots) \f$ 1092 const Tensor<T> operator()(const Slice& s0) const { 1093 TENSOR_ASSERT(this->ndim()==1,"invalid number of dimensions", 1094 this->ndim(),this); 1095 Slice s[1] = {s0}; 1096 return SliceTensor<T>(*this,s); 1097 } 1098 1099 /// Return a 1d SliceTensor that views the specified range of the 2d Tensor 1100 1101 /// @return SliceTensor viewing patch of original tensor: \f$ R(*) \rightarrow I(i,*) \f$ 1102 SliceTensor<T> operator()(long i, const Slice& s1) { 1103 TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions", 1104 this->ndim(),this); 1105 Slice s[2] = {Slice(i,i,0),s1}; 1106 return SliceTensor<T>(*this,s); 1107 } 1108 1109 /// Return a 1d SliceTensor that views the specified range of the 2d Tensor 1110 1111 /// @return Constant Tensor viewing patch of original tensor 1112 const Tensor<T> operator()(long i, const Slice& s1) const { 1113 TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions", 1114 this->ndim(),this); 1115 Slice s[2] = {Slice(i,i,0),s1}; 1116 return SliceTensor<T>(*this,s); 1117 } 1118 1119 /// Return a 1d SliceTensor that views the specified range of the 2d Tensor 1120 1121 /// @return SliceTensor viewing patch of original tensor 1122 SliceTensor<T> operator()(const Slice& s0, long j) { 1123 TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions", 1124 this->ndim(),this); 1125 Slice s[2] = {s0,Slice(j,j,0)}; 1126 return SliceTensor<T>(*this,s); 1127 } 1128 1129 /// Return a 1d constant Tensor that views the specified range of the 2d Tensor 1130 1131 /// @return Constant Tensor viewing patch of original tensor 1132 const Tensor<T> operator()(const Slice& s0, long j) const { 1133 TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions", 1134 this->ndim(),this); 1135 Slice s[2] = {s0,Slice(j,j,0)}; 1136 return SliceTensor<T>(*this,s); 1137 } 1138 1139 /// Return a 2d SliceTensor that views the specified range of the 2d Tensor 1140 1141 /// @return SliceTensor viewing patch of original tensor 1142 SliceTensor<T> operator()(const Slice& s0, const Slice& s1) { 1143 TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions", 1144 this->ndim(),this); 1145 Slice s[2] = {s0,s1}; 1146 return SliceTensor<T>(*this,s); 1147 } 1148 1149 /// Return a 2d constant Tensor that views the specified range of the 2d Tensor 1150 1151 /// @return Constant Tensor viewing patch of original tensor 1152 const Tensor<T> operator()(const Slice& s0, const Slice& s1) const { 1153 TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions", 1154 this->ndim(),this); 1155 Slice s[2] = {s0,s1}; 1156 return SliceTensor<T>(*this,s); 1157 } 1158 1159 /// Return a 3d SliceTensor that views the specified range of the 3d Tensor 1160 1161 /// @return SliceTensor viewing patch of original tensor 1162 SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2) { 1163 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1164 this->ndim(),this); 1165 Slice s[3] = {s0,s1,s2}; 1166 return SliceTensor<T>(*this,s); 1167 } 1168 1169 /// Return a 3d constant Tensor that views the specified range of the 3d Tensor 1170 1171 /// @return Constant Tensor viewing patch of original tensor 1172 const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2) const { 1173 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1174 this->ndim(),this); 1175 Slice s[3] = {s0,s1,s2}; 1176 return SliceTensor<T>(*this,s); 1177 } 1178 1179 /// Return a 2d SliceTensor that views the specified range of the 3d Tensor 1180 1181 /// @return SliceTensor viewing patch of original tensor 1182 SliceTensor<T> operator()(long i, const Slice& s1, const Slice& s2) { 1183 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1184 this->ndim(),this); 1185 Slice s[3] = {Slice(i,i,0),s1,s2}; 1186 return SliceTensor<T>(*this,s); 1187 } 1188 1189 /// Return a 2d constant Tensor that views the specified range of the 3d Tensor 1190 1191 /// @return Constant Tensor viewing patch of original tensor 1192 const Tensor<T> operator()(long i, const Slice& s1, const Slice& s2) const { 1193 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1194 this->ndim(),this); 1195 Slice s[3] = {Slice(i,i,0),s1,s2}; 1196 return SliceTensor<T>(*this,s); 1197 } 1198 1199 /// Return a 2d SliceTensor that views the specified range of the 3d Tensor 1200 1201 /// @return SliceTensor viewing patch of original tensor 1202 SliceTensor<T> operator()(const Slice& s0, long j, const Slice& s2) { 1203 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1204 this->ndim(),this); 1205 Slice s[3] = {s0,Slice(j,j,0),s2}; 1206 return SliceTensor<T>(*this,s); 1207 } 1208 1209 /// Return a 2d constant Tensor that views the specified range of the 3d Tensor 1210 1211 /// @return Constant Tensor viewing patch of original tensor 1212 const Tensor<T> operator()(const Slice& s0, long j, const Slice& s2) const { 1213 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1214 this->ndim(),this); 1215 Slice s[3] = {s0,Slice(j,j,0),s2}; 1216 return SliceTensor<T>(*this,s); 1217 } 1218 1219 /// Return a 2d SliceTensor that views the specified range of the 3d Tensor 1220 1221 /// @return SliceTensor viewing patch of original tensor 1222 SliceTensor<T> operator()(const Slice& s0, const Slice& s1, long k) { 1223 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1224 this->ndim(),this); 1225 Slice s[3] = {s0,s1,Slice(k,k,0)}; 1226 return SliceTensor<T>(*this,s); 1227 } 1228 1229 /// Return a 2d constant Tensor that views the specified range of the 3d Tensor 1230 1231 /// @return Constant Tensor viewing patch of original tensor 1232 const Tensor<T> operator()(const Slice& s0, const Slice& s1, long k) const { 1233 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1234 this->ndim(),this); 1235 Slice s[3] = {s0,s1,Slice(k,k,0)}; 1236 return SliceTensor<T>(*this,s); 1237 } 1238 1239 /// Return a 1d SliceTensor that views the specified range of the 3d Tensor 1240 1241 /// @return SliceTensor viewing patch of original tensor 1242 SliceTensor<T> operator()(long i, long j, const Slice& s2) { 1243 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1244 this->ndim(),this); 1245 Slice s[3] = {Slice(i,i,0),Slice(j,j,0),s2}; 1246 return SliceTensor<T>(*this,s); 1247 } 1248 1249 /// Return a 1d constant Tensor that views the specified range of the 3d Tensor 1250 1251 /// @return Constant Tensor viewing patch of original tensor 1252 const Tensor<T> operator()(long i, long j, const Slice& s2) const { 1253 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1254 this->ndim(),this); 1255 Slice s[3] = {Slice(i,i,0),Slice(j,j,0),s2}; 1256 return SliceTensor<T>(*this,s); 1257 } 1258 1259 /// Return a 1d SliceTensor that views the specified range of the 3d Tensor 1260 1261 /// @return SliceTensor viewing patch of original tensor 1262 SliceTensor<T> operator()(long i, const Slice& s1, long k) { 1263 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1264 this->ndim(),this); 1265 Slice s[3] = {Slice(i,i,0),s1,Slice(k,k,0)}; 1266 return SliceTensor<T>(*this,s); 1267 } 1268 1269 /// Return a 1d constant Tensor that views the specified range of the 3d Tensor 1270 1271 /// @return Constant Tensor viewing patch of original tensor 1272 const Tensor<T> operator()(long i, const Slice& s1, long k) const { 1273 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1274 this->ndim(),this); 1275 Slice s[3] = {Slice(i,i,0),s1,Slice(k,k,0)}; 1276 return SliceTensor<T>(*this,s); 1277 } 1278 1279 /// Return a 1d SliceTensor that views the specified range of the 3d Tensor 1280 1281 /// @return SliceTensor viewing patch of original tensor 1282 SliceTensor<T> operator()(const Slice& s0, long j, long k) { 1283 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1284 this->ndim(),this); 1285 Slice s[3] = {s0,Slice(j,j,0),Slice(k,k,0)}; 1286 return SliceTensor<T>(*this,s); 1287 } 1288 1289 /// Return a 1d constant Tensor that views the specified range of the 3d Tensor 1290 1291 /// @return Constant Tensor viewing patch of original tensor 1292 const Tensor<T> operator()(const Slice& s0, long j, long k) const { 1293 TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions", 1294 this->ndim(),this); 1295 Slice s[3] = {s0,Slice(j,j,0),Slice(k,k,0)}; 1296 return SliceTensor<T>(*this,s); 1297 } 1298 1299 /// Return a 1-4d SliceTensor that views the specified range of the 4d Tensor 1300 1301 /// @return SliceTensor viewing patch of original tensor 1302 SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2, 1303 const Slice& s3) { 1304 TENSOR_ASSERT(this->ndim()==4,"invalid number of dimensions", 1305 this->ndim(),this); 1306 Slice s[4] = {s0,s1,s2,s3}; 1307 return SliceTensor<T>(*this,s); 1308 } 1309 1310 /// Return a 1-4d constant Tensor that views the specified range of the 4d Tensor 1311 1312 /// @return Constant Tensor viewing patch of original tensor 1313 const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2, 1314 const Slice& s3) const { 1315 TENSOR_ASSERT(this->ndim()==4,"invalid number of dimensions", 1316 this->ndim(),this); 1317 Slice s[4] = {s0,s1,s2,s3}; 1318 return SliceTensor<T>(*this,s); 1319 } 1320 1321 /// Return a 1-5d SliceTensor that views the specified range of the 5d Tensor 1322 1323 /// @return SliceTensor viewing patch of original tensor 1324 SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2, 1325 const Slice& s3, const Slice& s4) { 1326 TENSOR_ASSERT(this->ndim()==5,"invalid number of dimensions", 1327 this->ndim(),this); 1328 Slice s[5] = {s0,s1,s2,s3,s4}; 1329 return SliceTensor<T>(*this,s); 1330 } 1331 1332 /// Return a 1-5d constant Tensor that views the specified range of the 5d Tensor 1333 1334 /// @return Constant Tensor viewing patch of original tensor 1335 const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2, 1336 const Slice& s3, const Slice& s4) const { 1337 TENSOR_ASSERT(this->ndim()==5,"invalid number of dimensions", 1338 this->ndim(),this); 1339 Slice s[5] = {s0,s1,s2,s3,s4}; 1340 return SliceTensor<T>(*this,s); 1341 } 1342 1343 /// Return a 1-6d SliceTensor that views the specified range of the 6d Tensor 1344 1345 /// @return SliceTensor viewing patch of original tensor 1346 SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2, 1347 const Slice& s3, const Slice& s4, const Slice& s5) { 1348 TENSOR_ASSERT(this->ndim()==6,"invalid number of dimensions", 1349 this->ndim(),this); 1350 Slice s[6] = {s0,s1,s2,s3,s4,s5}; 1351 return SliceTensor<T>(*this,s); 1352 } 1353 1354 1355 /// Return a 1-6d constant Tensor that views the specified range of the 6d Tensor 1356 1357 /// @return Constant Tensor viewing patch of original tensor 1358 const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2, 1359 const Slice& s3, const Slice& s4, const Slice& s5) const { 1360 TENSOR_ASSERT(this->ndim()==6,"invalid number of dimensions", 1361 this->ndim(),this); 1362 Slice s[6] = {s0,s1,s2,s3,s4,s5}; 1363 return SliceTensor<T>(*this,s); 1364 } 1365 1366 /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor 1367 1368 /// @param[in] ndimnew Number of dimensions in the result 1369 /// @param[in] d Array containing size of each new dimension 1370 /// @return New tensor (viewing same underlying data as the original but with different shape) 1371 Tensor<T> reshape(int ndimnew, const long* d) { 1372 Tensor<T> result(*this); 1373 result.reshape_inplace(ndimnew,d); 1374 return result; 1375 } 1376 1377 /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor 1378 1379 /// @param[in] ndimnew Number of dimensions in the result 1380 /// @param[in] d Array containing size of each new dimension 1381 /// @return New tensor (viewing same underlying data as the original but with different shape) 1382 const Tensor<T> reshape(int ndimnew, const long* d) const { 1383 Tensor<T> result(*const_cast<Tensor<T>*>(this)); 1384 result.reshape_inplace(ndimnew,d); 1385 return result; 1386 } 1387 1388 /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor 1389 1390 /// @param[in] d Array containing size of each new dimension 1391 /// @return New tensor (viewing same underlying data as the original but with different shape) 1392 Tensor<T> reshape(const std::vector<long>& d) { 1393 return reshape(d.size(), d.size() ? &d[0] : 0); 1394 } 1395 1396 /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor 1397 1398 /// @param[in] d Array containing size of each new dimension 1399 /// @return New tensor (viewing same underlying data as the original but with different shape) 1400 const Tensor<T> reshape(const std::vector<long>& d) const { 1401 return reshape(d.size(), d.size() ? &d[0] : 0); 1402 } 1403 1404 /// Returns new view/tensor rehapings to conforming 1-d tensor with given dimension 1405 1406 /// @param[in] dim0 Size of new dimension 0 1407 /// @return New tensor (viewing same underlying data as the original but with different shape) 1408 Tensor<T> reshape(long dim0) { 1409 long d[1] = {dim0}; 1410 return reshape(1,d); 1411 } 1412 /// Returns new view/tensor rehapings to conforming 1-d tensor with given dimension 1413 1414 /// @param[in] dim0 Size of new dimension 0 1415 /// @return New tensor (viewing same underlying data as the original but with different shape) 1416 const Tensor<T> reshape(long dim0) const { 1417 long d[1] = {dim0}; 1418 return reshape(1,d); 1419 } 1420 1421 /// Returns new view/tensor rehaping to conforming 2-d tensor with given dimensions 1422 1423 /// @param[in] dim0 Size of new dimension 0 1424 /// @param[in] dim1 Size of new dimension 1 1425 /// @return New tensor (viewing same underlying data as the original but with different shape) 1426 Tensor<T> reshape(long dim0, long dim1) { 1427 long d[2] = {dim0,dim1}; 1428 return reshape(2,d); 1429 } 1430 1431 /// Returns new view/tensor rehaping to conforming 2-d tensor with given dimensions 1432 1433 /// @param[in] dim0 Size of new dimension 0 1434 /// @param[in] dim1 Size of new dimension 1 1435 /// @return New tensor (viewing same underlying data as the original but with different shape) 1436 const Tensor<T> reshape(long dim0, long dim1) const { 1437 long d[2] = {dim0,dim1}; 1438 return reshape(2,d); 1439 } 1440 1441 /// Returns new view/tensor rehaping to conforming 3-d tensor with given dimensions 1442 1443 /// @param[in] dim0 Size of new dimension 0 1444 /// @param[in] dim1 Size of new dimension 1 1445 /// @param[in] dim2 Size of new dimension 2 1446 /// @return New tensor (viewing same underlying data as the original but with different shape) 1447 Tensor<T> reshape(long dim0, long dim1, long dim2) { 1448 long d[3] = {dim0,dim1,dim2}; 1449 return reshape(3,d); 1450 } 1451 1452 /// Returns new view/tensor rehaping to conforming 3-d tensor with given dimensions 1453 1454 /// @param[in] dim0 Size of new dimension 0 1455 /// @param[in] dim1 Size of new dimension 1 1456 /// @param[in] dim2 Size of new dimension 2 1457 /// @return New tensor (viewing same underlying data as the original but with different shape) 1458 const Tensor<T> reshape(long dim0, long dim1, long dim2) const { 1459 long d[3] = {dim0,dim1,dim2}; 1460 return reshape(3,d); 1461 } 1462 1463 /// Returns new view/tensor rehaping to conforming 4-d tensor with given dimensions 1464 1465 /// @param[in] dim0 Size of new dimension 0 1466 /// @param[in] dim1 Size of new dimension 1 1467 /// @param[in] dim2 Size of new dimension 2 1468 /// @param[in] dim3 Size of new dimension 3 1469 /// @return New tensor (viewing same underlying data as the original but with different shape) 1470 Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3) { 1471 long d[4] = {dim0,dim1,dim2,dim3}; 1472 return reshape(4,d); 1473 } 1474 1475 /// Returns new view/tensor rehaping to conforming 4-d tensor with given dimensions 1476 1477 /// @param[in] dim0 Size of new dimension 0 1478 /// @param[in] dim1 Size of new dimension 1 1479 /// @param[in] dim2 Size of new dimension 2 1480 /// @param[in] dim3 Size of new dimension 3 1481 /// @return New tensor (viewing same underlying data as the original but with different shape) 1482 const Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3) const { 1483 long d[4] = {dim0,dim1,dim2,dim3}; 1484 return reshape(4,d); 1485 } 1486 1487 /// Returns new view/tensor rehaping to conforming 5-d tensor with given dimensions 1488 1489 /// @param[in] dim0 Size of new dimension 0 1490 /// @param[in] dim1 Size of new dimension 1 1491 /// @param[in] dim2 Size of new dimension 2 1492 /// @param[in] dim3 Size of new dimension 3 1493 /// @param[in] dim4 Size of new dimension 4 1494 /// @return New tensor (viewing same underlying data as the original but with different shape) 1495 Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4) { 1496 long d[5] = {dim0,dim1,dim2,dim3,dim4}; 1497 return reshape(5,d); 1498 } 1499 1500 /// Returns new view/tensor rehaping to conforming 5-d tensor with given dimensions 1501 1502 /// @param[in] dim0 Size of new dimension 0 1503 /// @param[in] dim1 Size of new dimension 1 1504 /// @param[in] dim2 Size of new dimension 2 1505 /// @param[in] dim3 Size of new dimension 3 1506 /// @param[in] dim4 Size of new dimension 4 1507 /// @return New tensor (viewing same underlying data as the original but with different shape) 1508 const Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4) const { 1509 long d[5] = {dim0,dim1,dim2,dim3,dim4}; 1510 return reshape(5,d); 1511 } 1512 1513 /// Returns new view/tensor rehaping to conforming 6-d tensor with given dimensions 1514 1515 /// @param[in] dim0 Size of new dimension 0 1516 /// @param[in] dim1 Size of new dimension 1 1517 /// @param[in] dim2 Size of new dimension 2 1518 /// @param[in] dim3 Size of new dimension 3 1519 /// @param[in] dim4 Size of new dimension 4 1520 /// @param[in] dim5 Size of new dimension 5 1521 /// @return New tensor (viewing same underlying data as the original but with different shape) 1522 Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4, long dim5) { 1523 long d[6] = {dim0,dim1,dim2,dim3,dim4,dim5}; 1524 return reshape(6,d); 1525 } 1526 1527 /// Returns new view/tensor rehaping to conforming 6-d tensor with given dimensions 1528 1529 /// @param[in] dim0 Size of new dimension 0 1530 /// @param[in] dim1 Size of new dimension 1 1531 /// @param[in] dim2 Size of new dimension 2 1532 /// @param[in] dim3 Size of new dimension 3 1533 /// @param[in] dim4 Size of new dimension 4 1534 /// @param[in] dim5 Size of new dimension 5 1535 /// @return New tensor (viewing same underlying data as the original but with different shape) 1536 const Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4, long dim5) const { 1537 long d[6] = {dim0,dim1,dim2,dim3,dim4,dim5}; 1538 return reshape(6,d); 1539 } 1540 1541 /// Returns new view/tensor rehshaping to flat (1-d) tensor 1542 Tensor<T> flat() { 1543 long d[1] = {_size}; 1544 return reshape(1,d); 1545 } 1546 1547 /// Returns new view/tensor rehshaping to flat (1-d) tensor 1548 const Tensor<T> flat() const { 1549 long d[1] = {_size}; 1550 return reshape(1,d); 1551 } 1552 1553 /// Returns new view/tensor splitting dimension \c i as \c dimi0*dimi1 to produce conforming d+1 dimension tensor 1554 1555 /// @return New tensor (viewing same underlying data as the original but with additional dimensions) 1556 Tensor<T> splitdim(long i, long dimi0, long dimi1) { 1557 Tensor<T> result(*this); 1558 result.splitdim_inplace(i, dimi0, dimi1); 1559 return result; 1560 } 1561 1562 /// Returns new view/tensor splitting dimension \c i as \c dimi0*dimi1 to produce conforming d+1 dimension tensor 1563 1564 /// @return New tensor (viewing same underlying data as the original but with additional dimensions) 1565 const Tensor<T> splitdim(long i, long dimi0, long dimi1) const { 1566 Tensor<T> result(*const_cast<Tensor<T>*>(this)); 1567 result.splitdim_inplace(i, dimi0, dimi1); 1568 return result; 1569 } 1570 1571 /// Returns new view/tensor fusing contiguous dimensions \c i and \c i+1 1572 1573 /// @return New tensor (viewing same underlying data as the original but with fewer dimensions) 1574 Tensor<T> fusedim(long i) { 1575 Tensor<T> result(*this); 1576 result.fusedim_inplace(i); 1577 return result; 1578 } 1579 1580 /// Returns new view/tensor fusing contiguous dimensions \c i and \c i+1 1581 1582 /// @return New tensor (viewing same underlying data as the original but with fewer dimensions) 1583 const Tensor<T> fusedim(long i) const { 1584 Tensor<T> result(*const_cast<Tensor<T>*>(this)); 1585 result.fusedim_inplace(i); 1586 return result; 1587 } 1588 1589 /// Returns new view/tensor swaping dimensions \c i and \c j 1590 1591 /// @return New tensor (viewing same underlying data as the original but with reordered dimensions) 1592 Tensor<T> swapdim(long idim, long jdim) { 1593 Tensor<T> result(*this); 1594 result.swapdim_inplace(idim, jdim); 1595 return result; 1596 } 1597 1598 /// Returns new view/tensor swaping dimensions \c i and \c j 1599 1600 /// @return New tensor (viewing same underlying data as the original but with reordered dimensions) 1601 const Tensor<T> swapdim(long idim, long jdim) const { 1602 Tensor<T> result(*const_cast<Tensor<T>*>(this)); 1603 result.swapdim_inplace(idim, jdim); 1604 return result; 1605 } 1606 1607 /// Returns new view/tensor permuting the dimensions 1608 1609 /// @param[in] map Old dimension i becomes new dimension \c map[i] 1610 /// @return New tensor (viewing same underlying data as the original but with reordered dimensions) 1611 Tensor<T> mapdim(const std::vector<long>& map) { 1612 Tensor<T> result(*this); 1613 result.mapdim_inplace(map); 1614 return result; 1615 } 1616 1617 /// Returns new view/tensor permuting the dimensions 1618 1619 /// @return New tensor (viewing same underlying data as the original but with reordered dimensions) 1620 const Tensor<T> mapdim(const std::vector<long>& map) const { 1621 Tensor<T> result(*const_cast<Tensor<T>*>(this)); 1622 result.mapdim_inplace(map); 1623 return result; 1624 } 1625 1626 1627 /// Returns new view/tensor cycling the sub-dimensions `(start,...,end)` with `shift` steps 1628 Tensor<T> cycledim(long nshift, long start, long end) { 1629 Tensor<T> result(*this); 1630 result.cycledim_inplace(nshift, start, end); 1631 return result; 1632 } 1633 1634 1635 /// Returns new view/tensor cycling the sub-dimensions `(start,...,end)` with `shift` steps 1636 const Tensor<T> cycledim(long nshift, long start, long end) const { 1637 Tensor<T> result(*const_cast<Tensor<T>*>(this)); 1638 result.cycledim_inplace(nshift, start, end); 1639 return result; 1640 } 1641 1642 1643 /// Test if \c *this and \c t conform. 1644 template <class Q> bool conforms(const Tensor<Q>& t) const { 1645 return BaseTensor::conforms(&t); 1646 } 1647 1648 /// Returns the sum of all elements of the tensor 1649 T sum() const { 1650 T result = 0; 1651 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result += *_p0); 1652 return result; 1653 } 1654 1655 /// Returns the sum of the squares of the elements 1656 T sumsq() const { 1657 T result = 0; 1658 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result += (*_p0) * (*_p0)); 1659 return result; 1660 } 1661 1662 /// Return the product of all elements of the tensor 1663 T product() const { 1664 T result = 1; 1665 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result *= *_p0); 1666 return result; 1667 } 1668 1669 /// Return the minimum value (and if ind is non-null, its index) in the Tensor 1670 T min(long* ind=0) const { 1671 T result = *(this->_p); 1672 if (ind) { 1673 for (long i=0; i<_ndim; ++i) ind[i]=0; 1674 long nd = _ndim-1; 1675 UNARY_UNOPTIMIZED_ITERATOR(const T,(*this), 1676 if (result > *_p0) { 1677 result = *_p0; 1678 for (long i=0; i<nd; ++i) ind[i]=iter.ind[i]; 1679 ind[nd] = _j; 1680 } 1681 ); 1682 } 1683 else { 1684 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::min<T>(result,*_p0)); 1685 } 1686 return result; 1687 } 1688 1689 /// Return the maximum value (and if ind is non-null, its index) in the Tensor 1690 T max(long* ind=0) const { 1691 T result = *(this->_p); 1692 if (ind) { 1693 for (long i=0; i<_ndim; ++i) ind[i]=0; 1694 long nd = _ndim-1; 1695 UNARY_UNOPTIMIZED_ITERATOR(const T,(*this), 1696 if (result < *_p0) { 1697 result = *_p0; 1698 for (long i=0; i<nd; ++i) ind[i]=iter.ind[i]; 1699 ind[nd] = _j; 1700 } 1701 ); 1702 } 1703 else { 1704 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::max<T>(result,*_p0)); 1705 } 1706 return result; 1707 } 1708 1709 // For complex types, this next group returns the appropriate real type 1710 // For real types, the same type as T is returned (type_data.h) 1711 1712 /// Returns the Frobenius norm of the tensor 1713 float_scalar_type normf() const { 1714 float_scalar_type result = 0; 1715 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result += ::madness::detail::mynorm(*_p0)); 1716 return (float_scalar_type) std::sqrt(result); 1717 } 1718 1719 /// Return the absolute minimum value (and if ind is non-null, its index) in the Tensor 1720 scalar_type absmin(long *ind = 0) const { 1721 scalar_type result = std::abs(*(this->_p)); 1722 if (ind) { 1723 for (long i=0; i<_ndim; ++i) ind[i]=0; 1724 long nd = _ndim-1; 1725 UNARY_UNOPTIMIZED_ITERATOR(const T,(*this), 1726 scalar_type absval = std::abs(*_p0); 1727 if (result > absval) { 1728 result = absval; 1729 for (long i=0; i<nd; ++i) ind[i]=iter.ind[i]; 1730 ind[nd] = _j; 1731 } 1732 ); 1733 } 1734 else { 1735 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::min<scalar_type>(result,std::abs(*_p0))); 1736 } 1737 return result; 1738 } 1739 1740 /// Return the absolute maximum value (and if ind is non-null, its index) in the Tensor 1741 scalar_type absmax(long *ind = 0) const { 1742 scalar_type result = std::abs(*(this->_p)); 1743 if (ind) { 1744 for (long i=0; i<_ndim; ++i) ind[i]=0; 1745 long nd = _ndim-1; 1746 UNARY_UNOPTIMIZED_ITERATOR(T,(*this), 1747 scalar_type absval = std::abs(*_p0); 1748 if (result < absval) { 1749 result = absval; 1750 for (long i=0; i<nd; ++i) ind[i]=iter.ind[i]; 1751 ind[nd] = _j; 1752 } 1753 ); 1754 } 1755 else { 1756 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::max<scalar_type>(result,std::abs(*_p0))); 1757 } 1758 return result; 1759 } 1760 1761 1762 /// Return the trace of two tensors (no complex conjugate invoked) 1763 T trace(const Tensor<T>& t) const { 1764 T result = 0; 1765 BINARY_OPTIMIZED_ITERATOR(const T,(*this),const T,t,result += (*_p0)*(*_p1)); 1766 return result; 1767 } 1768 1769 /// Return the trace of two tensors with complex conjugate of the leftmost (i.e., this) 1770 template <class Q> 1771 TENSOR_RESULT_TYPE(T,Q) trace_conj(const Tensor<Q>& t) const { 1772 TENSOR_RESULT_TYPE(T,Q) result = 0; 1773 BINARY_OPTIMIZED_ITERATOR(const T,(*this),const Q,t,result += conditional_conj(*_p0)*(*_p1)); 1774 return result; 1775 } 1776 1777 /// Inplace apply a unary function to each element of the tensor 1778 template <typename opT> 1779 Tensor<T>& unaryop(opT& op) { 1780 UNARY_OPTIMIZED_ITERATOR(T,(*this),*_p0=op(*_p0)); 1781 return *this; 1782 } 1783 1784 /// Inplace multiply by corresponding elements of argument Tensor 1785 Tensor<T>& emul(const Tensor<T>& t) { 1786 BINARY_OPTIMIZED_ITERATOR(T,(*this),const T,t,*_p0 *= *_p1); 1787 return *this; 1788 } 1789 1790 /// Inplace generalized saxpy ... this = this*alpha + other*beta 1791 Tensor<T>& gaxpy(T alpha, const Tensor<T>& t, T beta) { 1792 if (iscontiguous() && t.iscontiguous()) { 1793 T* MADNESS_RESTRICT a = ptr(); 1794 const T* MADNESS_RESTRICT b = t.ptr(); 1795 if (alpha == T(1.0)) { 1796 for (long i=0; i<_size; ++i) a[i] += b[i]*beta; 1797 } 1798 else { 1799 for (long i=0; i<_size; ++i) a[i] = a[i]*alpha + b[i]*beta; 1800 } 1801 } 1802 else { 1803 //BINARYITERATOR(T,(*this),T,t, (*_p0) = alpha*(*_p0) + beta*(*_p1)); 1804 BINARY_OPTIMIZED_ITERATOR(T,(*this),const T,t, (*_p0) = alpha*(*_p0) + beta*(*_p1)); 1805 //ITERATOR((*this),(*this)(IND) = alpha*(*this)(IND) + beta*t(IND)); 1806 } 1807 return *this; 1808 } 1809 1810 /// Returns a pointer to the internal data 1811 T* ptr() { 1812 return _p; 1813 } 1814 1815 /// Returns a pointer to the internal data 1816 const T* ptr() const { 1817 return _p; 1818 } 1819 1820 /// Returns a pointer to the base class 1821 BaseTensor* base() { 1822 return static_cast<BaseTensor*>(this); 1823 } 1824 1825 /// Returns a pointer to the base class 1826 const BaseTensor* base() const { 1827 return static_cast<BaseTensor*>(this); 1828 } 1829 1830 /// Return iterator over single tensor 1831 TensorIterator<T> unary_iterator(long iterlevel=0, 1832 bool optimize=true, 1833 bool fusedim=true, 1834 long jdim=default_jdim) const { 1835 return TensorIterator<T>(this,(const Tensor<T>*) 0, (const Tensor<T>*) 0, 1836 iterlevel, optimize, fusedim, jdim); 1837 } 1838 1839 /// Return iterator over two tensors 1840 template <class Q> 1841 TensorIterator<T,Q> binary_iterator(const Tensor<Q>& q, 1842 long iterlevel=0, 1843 bool optimize=true, 1844 bool fusedim=true, 1845 long jdim=default_jdim) const { 1846 return TensorIterator<T,Q>(this,&q,(const Tensor<T>*) 0, 1847 iterlevel, optimize, fusedim, jdim); 1848 } 1849 1850 /// Return iterator over three tensors 1851 template <class Q, class R> 1852 TensorIterator<T,Q,R> ternary_iterator(const Tensor<Q>& q, 1853 const Tensor<R>& r, 1854 long iterlevel=0, 1855 bool optimize=true, 1856 bool fusedim=true, 1857 long jdim=default_jdim) const { 1858 return TensorIterator<T,Q,R>(this,&q,&r, 1859 iterlevel, optimize, fusedim, jdim); 1860 } 1861 1862 /// End point for forward iteration 1863 const TensorIterator<T>& end() const { 1864 static TensorIterator<T> theend(0,0,0,0,0,0); 1865 return theend; 1866 } 1867 1868 virtual ~Tensor() {} 1869 1870 /// Frees all memory and resests to state of default constructor 1871 void clear() {deallocate();} 1872 1873 bool has_data() const {return size()!=0;}; 1874 1875 }; 1876 1877 template <class T> 1878 std::ostream& operator << (std::ostream& out, const Tensor<T>& t); 1879 1880 1881 namespace archive { 1882 /// Serialize a tensor 1883 template <class Archive, typename T> 1884 struct ArchiveStoreImpl< Archive, Tensor<T> > { 1885 static void store(const Archive& s, const Tensor<T>& t) { 1886 if (t.iscontiguous()) { 1887 s & t.size() & t.id(); 1888 if (t.size()) s & t.ndim() & wrap(t.dims(),TENSOR_MAXDIM) & wrap(t.ptr(),t.size()); 1889 } 1890 else { 1891 s & copy(t); 1892 } 1893 }; 1894 }; 1895 1896 1897 /// Deserialize a tensor ... existing tensor is replaced 1898 template <class Archive, typename T> 1899 struct ArchiveLoadImpl< Archive, Tensor<T> > { 1900 static void load(const Archive& s, Tensor<T>& t) { 1901 long sz = 0l, id = 0l; 1902 s & sz & id; 1903 if (id != t.id()) throw "type mismatch deserializing a tensor"; 1904 if (sz) { 1905 long _ndim = 0l, _dim[TENSOR_MAXDIM]; 1906 s & _ndim & wrap(_dim,TENSOR_MAXDIM); 1907 t = Tensor<T>(_ndim, _dim, false); 1908 if (sz != t.size()) throw "size mismatch deserializing a tensor"; 1909 s & wrap(t.ptr(), t.size()); 1910 } 1911 else { 1912 t = Tensor<T>(); 1913 } 1914 }; 1915 }; 1916 1917 } 1918 1919 /// The class defines tensor op scalar ... here define scalar op tensor. 1920 1921 /// \ingroup tensor 1922 template <typename T, typename Q> 1923 typename IsSupported < TensorTypeData<Q>, Tensor<T> >::type 1924 operator+(Q x, const Tensor<T>& t) { 1925 return t+x; 1926 } 1927 1928 /// The class defines tensor op scalar ... here define scalar op tensor. 1929 1930 /// \ingroup tensor 1931 template <typename T, typename Q> 1932 typename IsSupported < TensorTypeData<Q>, Tensor<T> >::type 1933 operator*(const Q& x, const Tensor<T>& t) { 1934 return t*x; 1935 } 1936 1937 /// The class defines tensor op scalar ... here define scalar op tensor. 1938 1939 /// \ingroup tensor 1940 template <typename T, typename Q> 1941 typename IsSupported < TensorTypeData<Q>, Tensor<T> >::type 1942 operator-(Q x, const Tensor<T>& t) { 1943 return (-t)+=x; 1944 } 1945 1946 /// Returns a new contiguous tensor that is a deep copy of the input 1947 1948 /// \ingroup tensor 1949 /// @result Returns a new contiguous tensor that is a deep copy of the input 1950 template <class T> Tensor<T> copy(const Tensor<T>& t) { 1951 if (t.size()) { 1952 Tensor<T> result = Tensor<T>(t.ndim(),t.dims(),false); 1953 BINARY_OPTIMIZED_ITERATOR(T, result, const T, t, *_p0 = *_p1); 1954 return result; 1955 } 1956 else { 1957 return Tensor<T>(); 1958 } 1959 } 1960 1961 /// Returns a new contiguous tensor of type Q that is a deep copy of the input 1962 1963 /// \ingroup tensor 1964 /// @result Returns a new contiguous tensor that is a deep copy of the input 1965 template <class Q, class T> 1966 Tensor<Q> convert(const Tensor<T>& t) { 1967 if (t.size()) { 1968 Tensor<Q> result = Tensor<Q>(t.ndim(),t.dims(),false); 1969 BINARY_OPTIMIZED_ITERATOR(Q, result, const T, t, *_p0 = *_p1); 1970 return result; 1971 } 1972 else { 1973 return Tensor<Q>(); 1974 } 1975 } 1976 1977 1978 /// Transforms one dimension of the tensor t by the matrix c, returns new contiguous tensor 1979 1980 /// \ingroup tensor 1981 /// \code 1982 /// transform_dir(t,c,1) = r(i,j,k,...) = sum(j') t(i,j',k,...) * c(j',j) 1983 /// \endcode 1984 /// @param[in] t Tensor to transform (size of dimension to be transformed must match size of first dimension of \c c ) 1985 /// @param[in] c Matrix used for the transformation 1986 /// @param[in] axis Dimension (or axis) to be transformed 1987 /// @result Returns a new, contiguous tensor 1988 template <class T, class Q> 1989 Tensor<TENSOR_RESULT_TYPE(T,Q)> transform_dir(const Tensor<T>& t, const Tensor<Q>& c, int axis) { 1990 if (axis == 0) { 1991 return inner(c,t,0,axis); 1992 } 1993 else if (axis == t.ndim()-1) { 1994 return inner(t,c,axis,0); 1995 } 1996 else { 1997 return copy(inner(t,c,axis,0).cycledim(1,axis, -1)); // Copy to make contiguous 1998 } 1999 } 2000 2001 /// Returns a new deep copy of the transpose of the input tensor 2002 2003 /// \ingroup tensor 2004 template <class T> 2005 Tensor<T> transpose(const Tensor<T>& t) { 2006 TENSOR_ASSERT(t.ndim() == 2, "transpose requires a matrix", t.ndim(), &t); 2007 return copy(t.swapdim(0,1)); 2008 } 2009 2010 /// Returns a new deep copy of the complex conjugate transpose of the input tensor 2011 2012 /// \ingroup tensor 2013 template <class T> 2014 Tensor<T> conj_transpose(const Tensor<T>& t) { 2015 TENSOR_ASSERT(t.ndim() == 2, "conj_transpose requires a matrix", t.ndim(), &t); 2016 return conj(t.swapdim(0,1)); 2017 } 2018 2019 /// Indexing a non-constant tensor with slices returns a SliceTensor 2020 2021 /// \ingroup tensor 2022 /// A slice tensor differs from a tensor only in that assignment 2023 /// causes the data to be copied rather than entire new copy 2024 /// generated. You will usually not instantiate one except as a 2025 /// temporary produced by indexing a tensor with slice and then 2026 /// assigning it back to a tensor, or performing some other 2027 /// operation and discarding. 2028 template <class T> class SliceTensor : public Tensor<T> { 2029 private: 2030 SliceTensor<T>(); 2031 2032 public: 2033 SliceTensor(const Tensor<T>& t, const Slice s[]) 2034 : Tensor<T>(const_cast<Tensor<T>&>(t)) //!!!!!!!!!!! 2035 { 2036 // C++ standard says class derived from parameterized base class cannot 2037 // directly access the base class elements ... must explicitly reference. 2038 2039 long nd = 0, size=1; 2040 for (long i=0; i<t._ndim; ++i) { 2041 long start=s[i].start, end=s[i].end, step=s[i].step; 2042 //std::printf("%ld input start=%ld end=%ld step=%ld\n", 2043 //i, start, end, step); 2044 if (start < 0) start += this->_dim[i]; 2045 if (end < 0) end += this->_dim[i]; 2046 long len = end-start+1; 2047 if (step) len /= step; // Rounds len towards zero 2048 2049 // if input length is not exact multiple of step, round end towards start 2050 // for the same behaviour of for (i=start; i<=end; i+=step); 2051 end = start + (len-1)*step; 2052 2053 //std::printf("%ld munged start=%ld end=%ld step=%ld len=%ld _dim=%ld\n", 2054 // i, start, end, step, len, this->_dim[i]); 2055 2056 TENSOR_ASSERT(start>=0 && start<this->_dim[i],"slice start invalid",start,this); 2057 TENSOR_ASSERT(end>=0 && end<this->_dim[i],"slice end invalid",end,this); 2058 TENSOR_ASSERT(len>0,"slice length must be non-zero",len,this); 2059 2060 this->_p += start * t._stride[i]; 2061 2062 if (step) { 2063 size *= len; 2064 this->_dim[nd] = len; 2065 this->_stride[nd] = step * t._stride[i]; 2066 ++nd; 2067 } 2068 } 2069 //For Python interface need to be able to return a scalar inside a tensor with nd=0 2070 //TENSOR_ASSERT(nd>0,"slicing produced a scalar, but cannot return one",nd,this); 2071 for (long i=nd; i<TENSOR_MAXDIM; ++i) { // So can iterate over missing dimensions 2072 this->_dim[i] = 1; 2073 this->_stride[i] = 0; 2074 } 2075 2076 this->_ndim = nd; 2077 this->_size = size; 2078 } 2079 2080 SliceTensor<T>& operator=(const SliceTensor<T>& t) { 2081 BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 = (T)(*_p1)); 2082 return *this; 2083 } 2084 2085 template <class Q> 2086 SliceTensor<T>& operator=(const SliceTensor<Q>& t) { 2087 BINARY_OPTIMIZED_ITERATOR(T, (*this), const Q, t, *_p0 = (T)(*_p1)); 2088 return *this; 2089 } 2090 2091 SliceTensor<T>& operator=(const Tensor<T>& t) { 2092 BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 = (T)(*_p1)); 2093 return *this; 2094 } 2095 2096 template <class Q> 2097 SliceTensor<T>& operator=(const Tensor<Q>& t) { 2098 BINARY_OPTIMIZED_ITERATOR(T, (*this), const Q, t, *_p0 = (T)(*_p1)); 2099 return *this; 2100 } 2101 2102 SliceTensor<T>& operator=(const T& t) { 2103 UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 = t); 2104 return *this; 2105 } 2106 2107 virtual ~SliceTensor() {}; // Tensor<T> destructor does enough 2108 }; 2109 2110 2111 // Specializations for complex types 2112 template<> float_complex Tensor<float_complex>::min(long* ind) const ; 2113 template<> double_complex Tensor<double_complex>::min(long* ind) const ; 2114 template<> float_complex Tensor<float_complex>::max(long* ind) const ; 2115 template<> double_complex Tensor<double_complex>::max(long* ind) const ; 2116 2117 // Stream stuff 2118 2119 /// Print (for human consumption) a tensor to the stream 2120 2121 /// \ingroup tensor 2122 template <class T> 2123 std::ostream& operator << (std::ostream& s, const Tensor<T>& t) { 2124 if (t.size() == 0) { 2125 s << "[empty tensor]\n"; 2126 return s; 2127 } 2128 2129 long maxdim = 0; 2130 long index_width = 0; 2131 for (int i = 0; i<(t.ndim()-1); ++i) { 2132 if (maxdim < t.dim(i)) maxdim = t.dim(i); 2133 } 2134 if (maxdim < 10) 2135 index_width = 1; 2136 else if (maxdim < 100) 2137 index_width = 2; 2138 else if (maxdim < 1000) 2139 index_width = 3; 2140 else if (maxdim < 10000) 2141 index_width = 4; 2142 else 2143 index_width = 6; 2144 2145 std::ios::fmtflags oldflags = s.setf(std::ios::scientific); 2146 long oldprec = s.precision(); 2147 long oldwidth = s.width(); 2148 2149 // C++ formatted IO is worse than Fortran !! 2150 for (TensorIterator<T> iter=t.unary_iterator(1,false,false); iter!=t.end(); ++iter) { 2151 const T* p = iter._p0; 2152 long inc = iter._s0; 2153 long dimj = iter.dimj; 2154 s.unsetf(std::ios::scientific); 2155 s << '['; 2156 for (long i=0; i<iter.ndim; ++i) { 2157 s.width(index_width); 2158 s << iter.ind[i]; 2159 //if (i != iter.ndim) 2160 s << ","; 2161 } 2162 s << "*]"; 2163 //flo s.setf(std::ios::scientific); 2164 s.setf(std::ios::fixed); 2165 for (long j=0; j<dimj; ++j, p+=inc) { 2166 //flo s.precision(4); 2167 s << " "; 2168 s.precision(8); 2169 s.width(12); 2170 s << *p; 2171 } 2172 s.unsetf(std::ios::scientific); 2173 s << std::endl; 2174 } 2175 s.setf(oldflags,std::ios::floatfield); 2176 s.precision(oldprec); 2177 s.width(oldwidth); 2178 2179 return s; 2180 } 2181 2182 2183 /// Outer product ... result(i,j,...,p,q,...) = left(i,k,...)*right(p,q,...) 2184 2185 /// \ingroup tensor 2186 template <class T> 2187 Tensor<T> outer(const Tensor<T>& left, const Tensor<T>& right) { 2188 long nd = left.ndim() + right.ndim(); 2189 TENSOR_ASSERT(nd <= TENSOR_MAXDIM,"too many dimensions in result", 2190 nd,0); 2191 long d[TENSOR_MAXDIM]; 2192 for (long i=0; i<left.ndim(); ++i) d[i] = left.dim(i); 2193 for (long i=0; i<right.ndim(); ++i) d[i+left.ndim()] = right.dim(i); 2194 Tensor<T> result(nd,d,false); 2195 T* ptr = result.ptr(); 2196 2197 TensorIterator<T> iter=right.unary_iterator(1,false,true); 2198 for (TensorIterator<T> p=left.unary_iterator(); p!=left.end(); ++p) { 2199 T val1 = *p; 2200 // Cannot reorder dimensions, but can fuse contiguous dimensions 2201 for (iter.reset(); iter._p0; ++iter) { 2202 long dimj = iter.dimj; 2203 T* _p0 = iter._p0; 2204 long Tstride = iter._s0; 2205 for (long _j=0; _j<dimj; ++_j, _p0+=Tstride) { 2206 *ptr++ = val1 * (*_p0); 2207 } 2208 } 2209 } 2210 2211 return result; 2212 } 2213 2214 2215 /// Inner product ... result(i,j,...,p,q,...) = sum(z) left(i,j,...,z)*right(z,p,q,...) 2216 2217 /// \ingroup tensor 2218 /// By default it contracts the last dimension of the left tensor and 2219 /// the first dimension of the right tensor. These defaults can be 2220 /// changed by specifying \c k0 and \c k1 , the index to contract in 2221 /// the left and right side tensors, respectively. The defaults 2222 /// correspond to (\c k0=-1 and \c k1=0 ). 2223 template <class T, class Q> 2224 Tensor<TENSOR_RESULT_TYPE(T,Q)> inner(const Tensor<T>& left, const Tensor<Q>& right, 2225 long k0=-1, long k1=0) { 2226 if (k0 < 0) k0 += left.ndim(); 2227 if (k1 < 0) k1 += right.ndim(); 2228 long nd = left.ndim() + right.ndim() - 2; 2229 TENSOR_ASSERT(nd!=0, "result is a scalar but cannot return one ... use dot", 2230 nd, &left); 2231 2232 2233 TENSOR_ASSERT(left.dim(k0) == right.dim(k1),"common index must be same length", 2234 right.dim(k1), &left); 2235 2236 TENSOR_ASSERT(nd > 0 && nd <= TENSOR_MAXDIM, 2237 "invalid number of dimensions in the result", nd,0); 2238 2239 long d[TENSOR_MAXDIM]; 2240 2241 long base=0; 2242 for (long i=0; i<k0; ++i) d[i] = left.dim(i); 2243 for (long i=k0+1; i<left.ndim(); ++i) d[i-1] = left.dim(i); 2244 base = left.ndim()-1; 2245 for (long i=0; i<k1; ++i) d[i+base] = right.dim(i); 2246 base--; 2247 for (long i=k1+1; i<right.ndim(); ++i) d[i+base] = right.dim(i); 2248 2249 Tensor<TENSOR_RESULT_TYPE(T,Q)> result(nd,d); 2250 2251 inner_result(left,right,k0,k1,result); 2252 2253 return result; 2254 } 2255 2256 /// Accumulate inner product into user provided, contiguous, correctly sized result tensor 2257 2258 /// \ingroup tensor 2259 /// This routine may be used to optimize away the tensor constructor 2260 /// of the result tensor in inner loops when the result tensor may be 2261 /// reused or accumulated into. If the user calls this routine 2262 /// directly very little checking is done since it is intended as an 2263 /// optimization for small tensors. As far as the result goes, the 2264 /// caller is completely responsible for providing a contiguous tensor 2265 /// that has the correct dimensions and is appropriately initialized. 2266 /// The inner product is accumulated into result. 2267 template <class T, class Q> 2268 void inner_result(const Tensor<T>& left, const Tensor<Q>& right, 2269 long k0, long k1, Tensor< TENSOR_RESULT_TYPE(T,Q) >& result) { 2270 2271 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 2272 // Need to include explicit optimizations for common special cases 2273 // E.g., contiguous, matrix-matrix, and 3d-tensor*matrix 2274 2275 resultT* ptr = result.ptr(); 2276 2277 if (k0 < 0) k0 += left.ndim(); 2278 if (k1 < 0) k1 += right.ndim(); 2279 2280 if (left.iscontiguous() && right.iscontiguous()) { 2281 if (k0==0 && k1==0) { 2282 // c[i,j] = a[k,i]*b[k,j] ... collapsing extra indices to i & j 2283 long dimk = left.dim(k0); 2284 long dimj = right.stride(0); 2285 long dimi = left.stride(0); 2286 mTxm(dimi,dimj,dimk,ptr,left.ptr(),right.ptr()); 2287 return; 2288 } 2289 else if (k0==(left.ndim()-1) && k1==(right.ndim()-1)) { 2290 // c[i,j] = a[i,k]*b[j,k] ... collapsing extra indices to i & j 2291 long dimk = left.dim(k0); 2292 long dimi = left.size()/dimk; 2293 long dimj = right.size()/dimk; 2294 mxmT(dimi,dimj,dimk,ptr,left.ptr(),right.ptr()); 2295 return; 2296 } 2297 else if (k0==0 && k1==(right.ndim()-1)) { 2298 // c[i,j] = a[k,i]*b[j,k] ... collapsing extra indices to i & j 2299 long dimk = left.dim(k0); 2300 long dimi = left.stride(0); 2301 long dimj = right.size()/dimk; 2302 mTxmT(dimi,dimj,dimk,ptr,left.ptr(),right.ptr()); 2303 return; 2304 } 2305 else if (k0==(left.ndim()-1) && k1==0) { 2306 // c[i,j] = a[i,k]*b[k,j] ... collapsing extra indices to i & j 2307 long dimk = left.dim(k0); 2308 long dimi = left.size()/dimk; 2309 long dimj = right.stride(0); 2310 mxm(dimi,dimj,dimk,ptr,left.ptr(),right.ptr()); 2311 return; 2312 } 2313 } 2314 2315 long dimj = left.dim(k0); 2316 TensorIterator<Q> iter1=right.unary_iterator(1,false,false,k1); 2317 2318 for (TensorIterator<T> iter0=left.unary_iterator(1,false,false,k0); 2319 iter0._p0; ++iter0) { 2320 T* MADNESS_RESTRICT xp0 = iter0._p0; 2321 long s0 = iter0._s0; 2322 for (iter1.reset(); iter1._p0; ++iter1) { 2323 T* MADNESS_RESTRICT p0 = xp0; 2324 Q* MADNESS_RESTRICT p1 = iter1._p0; 2325 long s1 = iter1._s0; 2326 resultT sum = 0; 2327 for (long j=0; j<dimj; ++j,p0+=s0,p1+=s1) { 2328 sum += (*p0) * (*p1); 2329 } 2330 *ptr++ += sum; 2331 } 2332 } 2333 } 2334 2335 /// Transform all dimensions of the tensor t by the matrix c 2336 2337 /// \ingroup tensor 2338 /// Often used to transform all dimensions from one basis to another 2339 /// \code 2340 /// result(i,j,k...) <-- sum(i',j', k',...) t(i',j',k',...) c(i',i) c(j',j) c(k',k) ... 2341 /// \endcode 2342 /// The input dimensions of \c t must all be the same and agree with 2343 /// the first dimension of \c c . The dimensions of \c c may differ in 2344 /// size. If the dimensions of \c c are the same, and the operation 2345 /// is being performed repeatedly, then you might consider calling \c 2346 /// fast_transform instead which enables additional optimizations and 2347 /// can eliminate all constructor overhead and improve cache locality. 2348 /// 2349 template <class T, class Q> 2350 Tensor<TENSOR_RESULT_TYPE(T,Q)> transform(const Tensor<T>& t, const Tensor<Q>& c) { 2351 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 2352 TENSOR_ASSERT(c.ndim() == 2,"second argument must be a matrix",c.ndim(),&c); 2353 if (c.dim(0)==c.dim(1) && t.iscontiguous() && c.iscontiguous()) { 2354 Tensor<resultT> result(t.ndim(),t.dims(),false); 2355 Tensor<resultT> work(t.ndim(),t.dims(),false); 2356 return fast_transform(t, c, result, work); 2357 } 2358 else { 2359 Tensor<resultT> result = t; 2360 for (long i=0; i<t.ndim(); ++i) { 2361 result = inner(result,c,0,0); 2362 } 2363 return result; 2364 } 2365 } 2366 2367 /// Transform all dimensions of the tensor t by distinct matrices c 2368 2369 /// \ingroup tensor 2370 /// Similar to transform but each dimension is transformed with a 2371 /// distinct matrix. 2372 /// \code 2373 /// result(i,j,k...) <-- sum(i',j', k',...) t(i',j',k',...) c[0](i',i) c[1](j',j) c[2](k',k) ... 2374 /// \endcode 2375 /// The first dimension of the matrices c must match the corresponding 2376 /// dimension of t. 2377 template <class T, class Q> 2378 Tensor<TENSOR_RESULT_TYPE(T,Q)> general_transform(const Tensor<T>& t, const Tensor<Q> c[]) { 2379 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 2380 Tensor<resultT> result = t; 2381 for (long i=0; i<t.ndim(); ++i) { 2382 result = inner(result,c[i],0,0); 2383 } 2384 return result; 2385 } 2386 2387 /// Restricted but heavily optimized form of transform() 2388 2389 /// \ingroup tensor 2390 /// Both dimensions of \c c must be the same and match all dimensions 2391 /// of the input tensor \c t. All tensors must be contiguous. 2392 /// 2393 /// Performs the same operation as \c transform but it requires 2394 /// that the caller pass in workspace and a preallocated result, 2395 /// hoping that that both can be reused. If the result and 2396 /// workspace are reused between calls, then no tensor 2397 /// constructors need be called and cache locality should be 2398 /// improved. By passing in the workspace, this routine is kept 2399 /// thread safe. 2400 /// 2401 /// The input, result and workspace tensors must be distinct. 2402 /// 2403 /// All input tensors must be contiguous and fastest execution 2404 /// will result if all dimensions are approriately aligned and 2405 /// multiples of the underlying vector length. The workspace and 2406 /// the result must be of the same size as the input \c t . The 2407 /// result tensor need not be initialized before calling 2408 /// fast_transform. 2409 /// 2410 /// \code 2411 /// result(i,j,k,...) <-- sum(i',j', k',...) t(i',j',k',...) c(i',i) c(j',j) c(k',k) ... 2412 /// \endcode 2413 /// 2414 /// The input dimensions of \c t must all be the same . 2415 template <class T, class Q> 2416 Tensor< TENSOR_RESULT_TYPE(T,Q) >& fast_transform(const Tensor<T>& t, const Tensor<Q>& c, Tensor< TENSOR_RESULT_TYPE(T,Q) >& result, 2417 Tensor< TENSOR_RESULT_TYPE(T,Q) >& workspace) { 2418 typedef TENSOR_RESULT_TYPE(T,Q) resultT; 2419 const Q *pc=c.ptr(); 2420 resultT *t0=workspace.ptr(), *t1=result.ptr(); 2421 if (t.ndim()&1) { 2422 t0 = result.ptr(); 2423 t1 = workspace.ptr(); 2424 } 2425 2426 long dimj = c.dim(1); 2427 long dimi = 1; 2428 for (int n=1; n<t.ndim(); ++n) dimi *= dimj; 2429 2430 #if HAVE_IBMBGQ 2431 long nij = dimi*dimj; 2432 if (IS_UNALIGNED(pc) || IS_UNALIGNED(t0) || IS_UNALIGNED(t1)) { 2433 for (long i=0; i<nij; ++i) t0[i] = 0.0; 2434 mTxm(dimi, dimj, dimj, t0, t.ptr(), pc); 2435 for (int n=1; n<t.ndim(); ++n) { 2436 for (long i=0; i<nij; ++i) t1[i] = 0.0; 2437 mTxm(dimi, dimj, dimj, t1, t0, pc); 2438 std::swap(t0,t1); 2439 } 2440 } 2441 else { 2442 mTxmq_padding(dimi, dimj, dimj, dimj, t0, t.ptr(), pc); 2443 for (int n=1; n<t.ndim(); ++n) { 2444 mTxmq_padding(dimi, dimj, dimj, dimj, t1, t0, pc); 2445 std::swap(t0,t1); 2446 } 2447 } 2448 #else 2449 // Now assume no restriction on the use of mtxmq 2450 mTxmq(dimi, dimj, dimj, t0, t.ptr(), pc); 2451 for (int n=1; n<t.ndim(); ++n) { 2452 mTxmq(dimi, dimj, dimj, t1, t0, pc); 2453 std::swap(t0,t1); 2454 } 2455 #endif 2456 2457 return result; 2458 } 2459 2460 /// Return a new tensor holding the absolute value of each element of t 2461 2462 /// \ingroup tensor 2463 template <class T> 2464 Tensor< typename Tensor<T>::scalar_type > abs(const Tensor<T>& t) { 2465 typedef typename Tensor<T>::scalar_type scalar_type; 2466 Tensor<scalar_type> result(t.ndim(),t.dims(),false); 2467 BINARY_OPTIMIZED_ITERATOR(scalar_type,result,const T,t,*_p0 = std::abs(*_p1)); 2468 return result; 2469 } 2470 2471 /// Return a new tensor holding the argument of each element of t (complex types only) 2472 2473 /// \ingroup tensor 2474 template <class T> 2475 Tensor< typename Tensor<T>::scalar_type > arg(const Tensor<T>& t) { 2476 typedef typename Tensor<T>::scalar_type scalar_type; 2477 Tensor<scalar_type> result(t.ndim(),t.dims(),false); 2478 BINARY_OPTIMIZED_ITERATOR(scalar_type,result,T,t,*_p0 = std::arg(*_p1)); 2479 return result; 2480 } 2481 2482 /// Return a new tensor holding the real part of each element of t (complex types only) 2483 2484 /// \ingroup tensor 2485 template <class T> 2486 Tensor< typename Tensor<T>::scalar_type > real(const Tensor<T>& t) { 2487 typedef typename Tensor<T>::scalar_type scalar_type; 2488 Tensor<scalar_type> result(t.ndim(),t.dims(),false); 2489 BINARY_OPTIMIZED_ITERATOR(scalar_type,result,const T,t,*_p0 = std::real(*_p1)); 2490 return result; 2491 } 2492 2493 /// Return a new tensor holding the imaginary part of each element of t (complex types only) 2494 2495 /// \ingroup tensor 2496 template <class T> 2497 Tensor< typename Tensor<T>::scalar_type > imag(const Tensor<T>& t) { 2498 typedef typename Tensor<T>::scalar_type scalar_type; 2499 Tensor<scalar_type> result(t.ndim(),t.dims(),false); 2500 BINARY_OPTIMIZED_ITERATOR(scalar_type,result,const T,t,*_p0 = std::imag(*_p1)); 2501 return result; 2502 } 2503 2504 /// Returns a new deep copy of the complex conjugate of the input tensor (complex types only) 2505 2506 /// \ingroup tensor 2507 template <class T> 2508 Tensor<T> conj(const Tensor<T>& t) { 2509 Tensor<T> result(t.ndim(),t.dims(),false); 2510 BINARY_OPTIMIZED_ITERATOR(T,result,const T,t,*_p0 = std::conj(*_p1)); 2511 return result; 2512 } 2513 } 2514 2515 #undef TENSOR_SHARED_PTR 2516 2517 #endif // MADNESS_TENSOR_TENSOR_H__INCLUDED 2518