1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 */
31 
32 #ifndef MADNESS_TENSOR_TENSOR_H__INCLUDED
33 #define MADNESS_TENSOR_TENSOR_H__INCLUDED
34 
35 #include <madness/madness_config.h>
36 #include <madness/misc/ran.h>
37 #include <madness/world/posixmem.h>
38 
39 #include <memory>
40 #include <complex>
41 #include <vector>
42 #include <cmath>
43 #include <cstdlib>
44 #include <cstddef>
45 
46 #include <madness/world/archive.h>
47 // #include <madness/world/print.h>
48 //
49 // typedef std::complex<float> float_complex;
50 // typedef std::complex<double> double_complex;
51 //
52 // // These probably have to be included in this order
53 // #include <madness/tensor/tensor_macros.h>
54 // #include <madness/tensor/type_data.h>
55 // #include <madness/tensor/slice.h>
56 // #include <madness/tensor/vector_factory.h>
57 #include <madness/tensor/basetensor.h>
58 #include <madness/tensor/aligned.h>
59 #include <madness/tensor/mxm.h>
60 #include <madness/tensor/tensorexcept.h>
61 #include <madness/tensor/tensoriter.h>
62 
63 #ifdef USE_GENTENSOR
64 #define HAVE_GENTENSOR 1
65 #else
66 #define HAVE_GENTENSOR 0
67 #endif
68 
69 
70 /*!
71   \file tensor.h
72   \brief Defines and implements most of Tensor
73   \ingroup tensor
74   \addtogroup tensor
75 
76   \par Introduction
77 
78   A tensor is a multi-dimensional array and does not incorporate any concepts
79   of covariance and contravariance.
80 
81   When a new tensor is created, the underlying data is also allocated.
82   E.g.,
83   \code
84   Tensor<double> a(3,4,5)
85   \endcode
86   creates a new 3-dimensional tensor and allocates a contiguous
87   block of 60 doubles which are initialized to zero.  The dimensions
88   (numbered from the left starting at 0) are in C or row-major
89   order.  Thus, for the tensor \c a , the stride between successive
90   elements of the right-most dimension is 1.  For the middle
91   dimension it is 5.  For the left-most dimension it is 20.  Thus,
92   the loops
93   \code
94   for (i=0; i<3; ++i)
95       for (j=0; j<4; ++j)
96           for (k=0; k<5; ++k)
97               a(i,j,k) = ...
98   \endcode
99   will go sequentially (and thus efficiently) through memory.
100   If the dimensions have been reordered (e.g., with \c swapdim()
101   or \c map() ), or if the tensor is actually a slice of another
102   tensor, then the layout in memory may be more complex and
103   may not reflect a contiguous block of memory.
104 
105   Multiple tensors may be used to provide multiple identical or
106   distinct views of the same data.  E.g., in the following
107   \code
108   Tensor<double> a(2,3);  // A new tensor initialized to zero
109   Tensor<double> b = a;
110   \endcode
111   \c a and \c b provide identical views of the same data, thus
112   \code
113   b(1,2) = 99;
114   cout << a(1,2) << endl;  // Outputs 99
115   cout << b(1,2) << endl;  // Outputs 99
116   \endcode
117 
118   \par Shallow copy and assignment
119 
120   It is important to appreciate that the views and the data are
121   quite independent.  In particular, the default copy constructor
122   and assignment operations only copy the tensor (the view) and not
123   the data --- <em> i.e., the copy constructor and assigment operations
124   only take shallow copies</em>.  This is for both consistency and
125   efficiency.  Thus, assigning one tensor to another generates another
126   view of the same data, replacing any previous view and not moving
127   or copying any of the data.
128   E.g.,
129   \code
130   Tensor<double> a(2,3);   // A new tensor initialized to zero
131   Tensor<double> c(3,3,3); // Another new tensor
132   Tensor<double> b = a;    // b is a view of the same data as a
133   a = c;                   // a is now a view of c's data
134   b = c                    // b is now also a view of c's data and the
135   // data allocated originally for a is freed
136   \endcode
137   The above example also illustrates how reference counting is used
138   to keep track of the underlying data.  Once there are no views
139   of the data, it is automatically freed.
140 
141   There are only two ways to actually copy the underlying data.  A
142   new, complete, and contiguous copy of a tensor and its data may be
143   generated with the \c copy() function.  Or, to copy data from one tensor
144   into the data viewed by another tensor, you must use a Slice.
145 
146   \par Indexing
147 
148   One dimensional tensors (i.e., vectors) may be indexed using
149   either square brackets (e.g., \c v[i] ) or round brackets (e.g.,
150   \c v(i) ).  All higher-dimensional tensors must use only round
151   brackets (e.g., \c t(i,j,k) ).  This is due to C++'s restriction
152   that the indexing operator (\c [] ) can only have one argument.
153   The indexing operation should generate efficient code.
154 
155   For the sake of efficiency, no bounds checking is performed by
156   default by most single element indexing operations.  Checking can
157   be enabled at compile time by defining \c -DTENSOR_BOUNDS_CHECKING for
158   application files including \c tensor.h.  The MADNESS configure script
159   has the option \c --enable-tensor-bound-checking to define the macro
160   in \c madness_config.h .  The general indexing
161   operation that takes a \c std::vector<long> index and all slicing
162   operations always perform bounds checking.  To make indexing with
163   checking a bit easier, a factory function has been provided for
164   vectors ... but note you need to explicitly use longs as the
165   index.
166   \code
167   Tensor<long> a(7,7,7);
168   a(3,4,5) += 1;                    // OK ... adds 1 to element (3,4,5)
169   a(3,4,9) += 1;                    // BAD ... undetected out-of-bounds access
170   a(vector_factory(3L,4L,9L)) += 1; // OK ... out-bounds access will
171   // be detected at runtime.
172   \endcode
173 
174   \par Slicing
175 
176   Slices generate sub-tensors --- i.e., views of patches of the
177   data.  E.g., to refer to all but the first and last elements in
178   each dimension of a matrix use
179   \code
180   a(Slice(1,-2),Slice(1,-2))
181   \endcode
182   Or to view odd elements in each dimension
183   \code
184   a(Slice(0,-1,2),Slice(0,-1,2))
185   \endcode
186   A slice or patch of a
187   tensor behaves exactly like a tensor \em except for assignment.
188   When a slice is assigned to, the data is copied with the
189   requirement that the source and destinations agree in size and
190   shape (i.e., they conform).  Thus, to copy the all of the data
191   from a to b,
192   \code
193   Tensor<double> a(3,4), b(3,4);
194   a = 1;                              // Set all elements of a to 1
195   b = 2;                              // Set all elements of b to 2
196   a(Slice(0,-1,1),Slice(0,-1,1)) = b; // Copy all data from b to a
197   a(_,_) = b(_,_);                    // Copy all data from b to a
198   a(___) = b(___);                    // Copy all data from b to a
199   a(Slice(1,2),Slice(1,2)) = b;       // Error, do not conform
200   \endcode
201   Special slice values \c _ ,\c  _reverse, and \c  ___ have
202   been defined to refer to all elements in a dimension, all
203   elements in a dimension but reversed, and all elements in all
204   dimensions, respectively.
205 
206   \par Iteration and algorithms
207 
208   See tensor_macros.h for documentation on the easiest mechanisms for iteration over
209   elements of tensors and tips for optimization.  See \c TensorIterator for
210   the most general form of iteration.
211 */
212 
213 
214 #ifndef HAVE_STD_ABS_LONG
215 #ifndef HAVE_STD_LABS
216 namespace std {
abs(long a)217     static long abs(long a) {
218         return a>=0 ? a : -a;
219     }
220 }
221 #else
222 namespace std {
abs(long a)223     static long abs(long a) {
224         return std::labs(a);
225     }
226 }
227 #endif
228 #endif
229 
230 
231 namespace madness {
232 #define IS_ODD(n) ((n)&0x1)
233 #define IS_UNALIGNED(p) (((unsigned long)(p))&0x7)
234 
235 
236     /// For real types return value, for complex return conjugate
237     template <typename Q, bool iscomplex>
238     struct conditional_conj_struct {
opconditional_conj_struct239         static Q op(const Q& coeff) {
240             return coeff;
241         }
242     };
243 
244     /// For real types return value, for complex return conjugate
245     template <typename Q>
246     struct conditional_conj_struct<Q,true> {
247         static Q op(const Q& coeff) {
248             return conj(coeff);
249         }
250     };
251 
252     /// For real types return value, for complex return conjugate
253     template <typename Q>
254     Q conditional_conj(const Q& coeff) {
255         return conditional_conj_struct<Q,TensorTypeData<Q>::iscomplex>::op(coeff);
256     }
257 
258     namespace detail {
259         template <typename T> T mynorm(T t) {
260             return t*t;
261         }
262 
263         template <typename T> T mynorm(std::complex<T> t) {
264             return std::norm(t);
265         }
266     }
267 
268     template <class T> class SliceTensor;
269 
270 
271     /// low rank representations of tensors (see gentensor.h)
272 	enum TensorType {TT_NONE, TT_FULL, TT_2D, TT_TENSORTRAIN};
273 
274     static
275     inline
276     std::ostream& operator << (std::ostream& s, const TensorType& tt) {
277         std::string str="confused tensor type";
278         if (tt==TT_FULL) str="full rank tensor";
279         if (tt==TT_2D) str="low rank tensor 2-way";
280         if (tt==TT_TENSORTRAIN) str="tensor train";
281         if (tt==TT_NONE) str="no tensor type specified";
282         s << str.c_str();
283         return s;
284     }
285 
286     //#define TENSOR_USE_SHARED_ALIGNED_ARRAY
287 #ifdef TENSOR_USE_SHARED_ALIGNED_ARRAY
288 #define TENSOR_SHARED_PTR detail::SharedAlignedArray
289     // this code has been tested and seems to work correctly on all
290     // tests and moldft ... however initial testing indicated a hard
291     // to measure improvement in thread scaling (from 20 to 60 threads
292     // on cn-mem) and no change in the modest thread count (20)
293     // execution time with tbballoc.  hence we are not presently using
294     // it but will test more in the future with different allocators
295     namespace detail {
296         // Minimal ref-counted array with data+counter in one alloc
297         template <typename T> class SharedAlignedArray {
298             T* volatile p;
299             AtomicInt* cnt;
300             void dec() {if (p && ((*cnt)-- == 1)) {free(p); p = 0;}}
301             void inc() {if (p) (*cnt)++;}
302         public:
303             SharedAlignedArray() : p(0), cnt(0) {}
304             T* allocate(std::size_t size, unsigned int alignment) {
305                 std::size_t offset = (size*sizeof(T)-1)/sizeof(AtomicInt) + 1; // Where the counter will be
306                 std::size_t nbyte = (offset+1)*sizeof(AtomicInt);
307                 if (posix_memalign((void **) &p, alignment, nbyte)) throw 1;
308                 cnt = (AtomicInt*)(p) + offset;
309                 *cnt = 1;
310                 return p;
311             }
312             SharedAlignedArray<T>& operator=(const SharedAlignedArray<T>& other) {
313                 if (this != &other) {dec(); p = other.p; cnt = other.cnt; inc();}
314                 return *this;
315             }
316             void reset() {dec(); p = 0;}
317             ~SharedAlignedArray() {dec();}
318         };
319     }
320 #else
321 #define TENSOR_SHARED_PTR std::shared_ptr
322 #endif
323 
324     /// A tensor is a multidimension array
325 
326     /// \ingroup tensor
327     template <class T> class Tensor : public BaseTensor {
328         template <class U> friend class SliceTensor;
329 
330     protected:
331         T* MADNESS_RESTRICT _p;
332         TENSOR_SHARED_PTR <T> _shptr;
333 
334         void allocate(long nd, const long d[], bool dozero) {
335             _id = TensorTypeData<T>::id;
336             if (nd < 0) {
337                 _p = 0;
338                 _shptr.reset();
339                 _size = 0;
340                 _ndim = -1;
341                 return;
342             }
343 
344             TENSOR_ASSERT(nd>0 && nd <= TENSOR_MAXDIM,"invalid ndim in new tensor", nd, 0);
345             // sanity check ... 2GB in doubles
346             for (int i=0; i<nd; ++i) {
347                 TENSOR_ASSERT(d[i]>=0 && d[i]<268435456, "invalid dimension size in new tensor",d[i],0);
348             }
349             set_dims_and_size(nd, d);
350             if (_size) {
351                 TENSOR_ASSERT(_size>=0 && _size<268435456, "invalid size in new tensor",_size,0);
352                 try {
353 #if HAVE_IBMBGP
354 #define TENSOR_ALIGNMENT 16
355 #elif HAVE_IBMBGQ
356 #define TENSOR_ALIGNMENT 32
357 #elif MADNESS_HAVE_AVX2
358 /* 32B alignment is best for performance according to
359  * http://www.nas.nasa.gov/hecc/support/kb/haswell-processors_492.html */
360 #define TENSOR_ALIGNMENT 32
361 #elif MADNESS_HAVE_AVX512
362 /* One can infer from the AVX2 case that 64B alignment helps with 512b SIMD. */
363 #define TENSOR_ALIGNMENT 64
364 #else
365 // Typical cache line size
366 #define TENSOR_ALIGNMENT 64
367 #endif
368 
369 #ifdef TENSOR_USE_SHARED_ALIGNED_ARRAY
370                     _p = _shptr.allocate(_size, TENSOR_ALIGNMENT);
371 #elif defined WORLD_GATHER_MEM_STATS
372                     _p = new T[_size];
373                     _shptr = std::shared_ptr<T>(_p);
374 #else
375                     if (posix_memalign((void **) &_p, TENSOR_ALIGNMENT, sizeof(T)*_size)) throw 1;
376                     _shptr.reset(_p, &free);
377 #endif
378                 }
379                 catch (...) {
380                     std::printf("new failed nd=%ld type=%ld size=%ld\n", nd, id(), _size);
381                     std::printf("  %ld %ld %ld %ld %ld %ld\n",
382                                 d[0], d[1], d[2], d[3], d[4], d[5]);
383                     TENSOR_EXCEPTION("new failed",_size,this);
384                 }
385                 //std::printf("allocated %p [%ld]  %ld\n", _p, size, p.use_count());
386                 if (dozero) {
387                     //T zero = 0; for (long i=0; i<_size; ++i) _p[i] = zero;
388                     // or
389 #ifdef HAVE_MEMSET
390                     memset((void *) _p, 0, _size*sizeof(T));
391 #else
392                     aligned_zero(_size, _p);
393 #endif
394                 }
395             }
396             else {
397                 _p = 0;
398                 _shptr.reset();
399             }
400         }
401 
402         // Free memory and restore default constructor state
403         void deallocate() {
404             _p = 0;
405             _shptr.reset();
406             _size = 0;
407             _ndim = -1;
408         }
409 
410     public:
411         /// C++ typename of this tensor.
412         typedef T type;
413 
414         /// C++ typename of the real type associated with a complex type.
415         typedef typename TensorTypeData<T>::scalar_type scalar_type;
416 
417         /// C++ typename of the floating point type associated with scalar real type
418         typedef typename TensorTypeData<T>::float_scalar_type float_scalar_type;
419 
420         /// Default constructor does not allocate any data and sets ndim=-1, size=0, _p=0, and id.
421         Tensor() : _p(0) {
422             _id = TensorTypeData<T>::id;
423         }
424 
425         /// Copy constructor is shallow (same as assignment)
426 
427         /// \em Caveat \em emptor: The shallow copy constructor has many virtues but
428         /// enables you to violate constness with simple code such as
429         /// \code
430         /// const Tensor<double> a(5);
431         /// Tensor<double> b(a);
432         /// b[1] = 3; // a[1] is now also 3
433         /// \endcode
434         Tensor(const Tensor<T>& t) {
435             _id = TensorTypeData<T>::id;
436             *this = t;
437         }
438 
439         /// Assignment is shallow (same as copy constructor)
440 
441         /// \em Caveat \em emptor: The shallow assignment has many virtues but
442         /// enables you to violate constness with simple code such as
443         /// \code
444         /// const Tensor<double> a(5);
445         /// Tensor<double> b;
446         /// b = a;
447         /// b[1] = 3; // a[1] is now also 3
448         /// \endcode
449         Tensor<T>& operator=(const Tensor<T>& t) {
450             if (this != &t) {
451                 _p = t._p;
452                 _shptr = t._shptr;
453                 _size = t._size;
454                 _ndim = t._ndim;
455                 for (int i=0; i<TENSOR_MAXDIM; ++i) {
456                     _dim[i] = t._dim[i];
457                     _stride[i] = t._stride[i];
458                 }
459             }
460             return *this;
461         }
462 
463 
464         /// Type conversion makes a deep copy
465         template <class Q> operator Tensor<Q>() const { // type conv => deep copy
466             Tensor<Q> result = Tensor<Q>(this->_ndim,this->_dim,false);
467             BINARY_OPTIMIZED_ITERATOR(Q, result, const T, (*this), *_p0 = (Q)(*_p1));
468             return result;
469         }
470 
471 
472         /// Create and zero new 1-d tensor
473 
474         /// @param[in] d0 Size of dimension 0
475         explicit Tensor(long d0) : _p(0) {
476             _dim[0] = d0;
477             allocate(1, _dim, true);
478         }
479 
480         /// Create and zero new 2-d tensor
481 
482         /// @param[in] d0 Size of dimension 0
483         /// @param[in] d1 Size of dimension 1
484         explicit Tensor(long d0, long d1) : _p(0) {
485             _dim[0] = d0; _dim[1] = d1;
486             allocate(2, _dim, true);
487         }
488 
489         /// Create and zero new 3-d tensor
490 
491         /// @param[in] d0 Size of dimension 0
492         /// @param[in] d1 Size of dimension 1
493         /// @param[in] d2 Size of dimension 2
494         explicit Tensor(long d0, long d1, long d2) : _p(0) {
495             _dim[0] = d0; _dim[1] = d1; _dim[2] = d2;
496             allocate(3, _dim, true);
497         }
498 
499         /// Create and zero new 4-d tensor
500 
501         /// @param[in] d0 Size of dimension 0
502         /// @param[in] d1 Size of dimension 1
503         /// @param[in] d2 Size of dimension 2
504         /// @param[in] d3 Size of dimension 3
505         explicit Tensor(long d0, long d1, long d2, long d3) : _p(0) {
506             _dim[0] = d0; _dim[1] = d1; _dim[2] = d2; _dim[3] = d3;
507             allocate(4, _dim, true);
508         }
509 
510         /// Create and zero new 5-d tensor
511 
512         /// @param[in] d0 Size of dimension 0
513         /// @param[in] d1 Size of dimension 1
514         /// @param[in] d2 Size of dimension 2
515         /// @param[in] d3 Size of dimension 3
516         /// @param[in] d4 Size of dimension 4
517         explicit Tensor(long d0, long d1, long d2, long d3, long d4) : _p(0) {
518             _dim[0] = d0; _dim[1] = d1; _dim[2] = d2; _dim[3] = d3; _dim[4] = d4;
519             allocate(5, _dim, true);
520         }
521 
522         /// Create and zero new 6-d tensor
523 
524         /// @param[in] d0 Size of dimension 0
525         /// @param[in] d1 Size of dimension 1
526         /// @param[in] d2 Size of dimension 2
527         /// @param[in] d3 Size of dimension 3
528         /// @param[in] d4 Size of dimension 4
529         /// @param[in] d5 Size of dimension 5
530         explicit Tensor(long d0, long d1, long d2, long d3, long d4, long d5) {
531             _dim[0] = d0; _dim[1] = d1; _dim[2] = d2; _dim[3] = d3; _dim[4] = d4; _dim[5] = d5;
532             allocate(6, _dim, true);
533         }
534 
535         /// Create and optionally zero new n-d tensor. This is the most general constructor.
536 
537         /// @param[in] d Vector containing size of each dimension, number of dimensions inferred from vcector size.
538         /// @param[in] dozero If true (default) the tensor is initialized to zero
539         explicit Tensor(const std::vector<long>& d, bool dozero=true) : _p(0) {
540             allocate(d.size(), d.size() ? &(d[0]) : 0, dozero);
541         }
542 
543         /// Politically incorrect general constructor.
544 
545         /// @param[in] nd Number of dimensions
546         /// @param[in] d Size of each dimension
547         /// @param[in] dozero If true (default) the tensor is initialized to zero
548         explicit Tensor(long nd, const long d[], bool dozero=true) : _p(0) {
549             allocate(nd,d,dozero);
550         }
551 
552         /// Inplace fill tensor with scalar
553 
554         /// @param[in] x Value used to fill tensor via assigment
555         /// @return %Reference to this tensor
556         Tensor<T>& operator=(T x) {
557             UNARY_OPTIMIZED_ITERATOR(T,(*this),*_p0 = x);
558             return *this;
559         }
560 
561         /// Inplace fill with a scalar (legacy name)
562 
563         /// @param[in] x Value used to fill tensor via assigment
564         /// @return %Reference to this tensor
565         Tensor<T>& fill(T x) {
566             *this = x;
567             return *this;
568         }
569 
570         /// Inplace addition of two tensors
571 
572         /// @param[in] t Conforming tensor to be added in-place to this tensor
573         /// @return %Reference to this tensor
574         template <typename Q>
575         Tensor<T>& operator+=(const Tensor<Q>& t) {
576             BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 += *_p1);
577             return *this;
578         }
579 
580         /// Inplace subtraction of two tensors
581 
582         /// @param[in] t Conforming tensor to be subtracted in-place from this tensor
583         /// @return %Reference to this tensor
584         template <typename Q>
585         Tensor<T>& operator-=(const Tensor<Q>& t) {
586             BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 -= *_p1);
587             return *this;
588         }
589 
590         /// Addition of two tensors to produce a new tensor
591 
592         /// @param[in] t Conforming tensor to be added out-of-place to this tensor
593         /// @return New tensor
594         template <typename Q>
595         Tensor< TENSOR_RESULT_TYPE(T,Q) > operator+(const Tensor<Q>& t) const {
596             typedef TENSOR_RESULT_TYPE(T,Q) resultT;
597             Tensor<resultT> result(_ndim,_dim,false);
598             TERNARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), const Q, t, *_p0 = *_p1 + *_p2);
599             return result;
600         }
601 
602         /// Subtraction of two tensors to produce a new tensor
603 
604         /// @param[in] t Conforming tensor to be subtracted out-of-place from this tensor
605         /// @return New tensor
606         template <typename Q>
607         Tensor< TENSOR_RESULT_TYPE(T,Q) > operator-(const Tensor<Q>& t) const {
608             typedef TENSOR_RESULT_TYPE(T,Q) resultT;
609             Tensor<resultT> result(_ndim,_dim,false);
610             TERNARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), const Q, t, *_p0 = *_p1 - *_p2);
611             return result;
612         }
613 
614         /// Multiplication of tensor by a scalar of a supported type to produce a new tensor
615 
616         /// @param[in] x Scalar value
617         /// @return New tensor
618         template <typename Q>
619         typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type
620         operator*(const Q& x) const {
621             typedef TENSOR_RESULT_TYPE(T,Q) resultT;
622             Tensor<resultT> result(_ndim,_dim,false);
623             BINARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), *_p0 = *_p1 * x);
624             return result;
625         }
626 
627         /// Divide tensor by a scalar of a supported type to produce a new tensor
628 
629         /// @param[in] x Scalar value
630         /// @return New tensor
631         template <typename Q>
632         typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type
633         operator/(const Q& x) const {
634             typedef TENSOR_RESULT_TYPE(T,Q) resultT;
635             Tensor<resultT> result(_ndim,_dim);
636             BINARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), *_p0 = *_p1 / x);
637             return result;
638         }
639 
640         /// Add a scalar of the same type to all elements of a tensor producing a new tensor
641 
642         /// @param[in] x Scalar value
643         /// @return New tensor
644         template <typename Q>
645         typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type
646         operator+(const Q& x) const {
647             typedef TENSOR_RESULT_TYPE(T,Q) resultT;
648             Tensor<resultT> result(_ndim,_dim);
649             BINARY_OPTIMIZED_ITERATOR(resultT, result, const T, (*this), *_p0 = *_p1 + x);
650             return result;
651         }
652 
653         /// Subtract a scalar of the same type from all elements producing a new tensor
654 
655         /// @param[in] x Scalar value
656         /// @return New tensor
657         template <typename Q>
658         typename IsSupported<TensorTypeData<Q>, Tensor<TENSOR_RESULT_TYPE(T,Q)> >::type
659         operator-(const Q& x) const {
660             return (*this) + (-x);
661         }
662 
663         /// Unary negation producing a new tensor
664 
665         /// @return New tensor
666         Tensor<T> operator-() const {
667             Tensor<T> result = Tensor<T>(_ndim,_dim,false);
668             BINARY_OPTIMIZED_ITERATOR(T, result, const T, (*this), *(_p0) = - (*_p1));
669             return result;
670         }
671 
672         /// Inplace multiplication by scalar of supported type
673 
674         /// @param[in] x Scalar value
675         /// @return %Reference to this tensor
676         template <typename Q>
677         typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type
678         operator*=(const Q& x) {
679             UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 *= x);
680             return *this;
681         }
682 
683         /// Inplace multiplication by scalar of supported type (legacy name)
684 
685         /// @param[in] x Scalar value
686         /// @return %Reference to this tensor
687         template <typename Q>
688         typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type
689         scale(Q x) {
690             return (*this)*=x;
691         }
692 
693         /// Inplace increment by scalar of supported type
694 
695         /// @param[in] x Scalar value
696         /// @return %Reference to this tensor
697         template <typename Q>
698         typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type
699         operator+=(const Q& x) {
700             UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 += x);
701             return *this;
702         }
703 
704         /// Inplace decrement by scalar of supported type
705 
706         /// @param[in] x Scalar value
707         /// @return %Reference to this tensor
708         template <typename Q>
709         typename IsSupported<TensorTypeData<Q>,Tensor<T>&>::type
710         operator-=(const Q& x) {
711             UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 -= x);
712             return *this;
713         }
714 
715 
716         /// Inplace complex conjugate
717 
718         /// @return %Reference to this tensor
719         Tensor<T>& conj() {
720             UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 = conditional_conj(*_p0));
721             return *this;
722         }
723 
724         /// Inplace fill with random values ( \c [0,1] for floats, \c [0,MAXSIZE] for integers)
725 
726         /// @return %Reference to this tensor
727         Tensor<T>& fillrandom() {
728             if (iscontiguous()) {
729                 madness::RandomVector<T>(size(), ptr());
730             }
731             else {
732                 UNARY_OPTIMIZED_ITERATOR(T,(*this), *_p0 = madness::RandomValue<T>());
733             }
734             return *this;
735         }
736 
737         /// Inplace fill with the index of each element
738 
739         /// Each element is assigned it's logical index according to this loop structure
740         /// \code
741         /// Tensor<float> t(5,6,7,...)
742         /// long index=0;
743         /// for (long i=0; i<_dim[0]; ++i)
744         ///    for (long j=0; j<_dim[1]; ++j)
745         ///       for (long k=0; k<_dim[2]; ++k)
746         ///          ...
747         ///          tensor(i,j,k,...) = index++
748         /// \endcode
749         ///
750         /// @return %Reference to this tensor
751         Tensor<T>& fillindex() {
752             long count = 0;
753             UNARY_UNOPTIMIZED_ITERATOR(T,(*this), *_p0 = count++); // Fusedim would be OK
754             return *this;
755         }
756 
757         /// Inplace set elements of \c *this less than \c x in absolute magnitude to zero.
758 
759         /// @param[in] x Scalar value
760         /// @return %Reference to this tensor
761         Tensor<T>& screen(double x) {
762             T zero = 0;
763             UNARY_OPTIMIZED_ITERATOR(T,(*this), if (std::abs(*_p0)<x) *_p0=zero);
764             return *this;
765         }
766 
767 
768         /// Return true if bounds checking was enabled at compile time
769 
770         /// @return True if bounds checking was enabled at compile time
771         static bool bounds_checking() {
772 #ifdef TENSOR_BOUNDS_CHECKING
773             return true;
774 #else
775             return false;
776 #endif
777         }
778 
779         /// 1-d indexing operation using \c [] \em without bounds checking.
780 
781         /// @param[in] i index for dimension 0
782         /// @return %Reference to element
783         T& operator[](long i) {
784 #ifdef TENSOR_BOUNDS_CHECKING
785             TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this);
786 #endif
787             return _p[i*_stride[0]];
788         }
789 
790         /// 1-d indexing operation using \c [] \em without bounds checking.
791 
792         /// @param[in] i index for dimension 0
793         /// @return %Reference to element
794         const T& operator[](long i) const {
795 #ifdef TENSOR_BOUNDS_CHECKING
796             TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this);
797 #endif
798             return _p[i*_stride[0]];
799         }
800 
801         /// 1-d indexing operation \em without bounds checking.
802 
803         /// @param[in] i index for dimension 0
804         /// @return %Reference to element
805         T& operator()(long i) {
806 #ifdef TENSOR_BOUNDS_CHECKING
807             TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this);
808 #endif
809             return _p[i*_stride[0]];
810         }
811 
812         /// 1-d indexing operation \em without bounds checking.
813 
814         /// @param[in] i index for dimension 0
815         /// @return %Reference to element
816         const T& operator()(long i) const {
817 #ifdef TENSOR_BOUNDS_CHECKING
818             TENSOR_ASSERT(i>=0 && i<_dim[0],"1d bounds check failed dim=0",i,this);
819 #endif
820             return _p[i*_stride[0]];
821         }
822 
823         /// 2-d indexing operation \em without bounds checking.
824 
825         /// @param[in] i index for dimension 0
826         /// @param[in] j index for dimension 1
827         /// @return %Reference to element
828         T& operator()(long i, long j) {
829 #ifdef TENSOR_BOUNDS_CHECKING
830             TENSOR_ASSERT(i>=0 && i<_dim[0],"2d bounds check failed dim=0",i,this);
831             TENSOR_ASSERT(j>=0 && j<_dim[1],"2d bounds check failed dim=1",j,this);
832 #endif
833             return _p[i*_stride[0]+j*_stride[1]];
834         }
835 
836         /// 2-d indexing operation \em without bounds checking.
837 
838         /// @param[in] i index for dimension 0
839         /// @param[in] j index for dimension 1
840         /// @return %Reference to element
841         const T& operator()(long i, long j) const {
842 #ifdef TENSOR_BOUNDS_CHECKING
843             TENSOR_ASSERT(i>=0 && i<_dim[0],"2d bounds check failed dim=0",i,this);
844             TENSOR_ASSERT(j>=0 && j<_dim[1],"2d bounds check failed dim=1",j,this);
845 #endif
846             return _p[i*_stride[0]+j*_stride[1]];
847         }
848 
849         /// 3-d indexing operation \em without bounds checking.
850 
851         /// @param[in] i index for dimension 0
852         /// @param[in] j index for dimension 1
853         /// @param[in] k index for dimension 2
854         /// @return %Reference to element
855         T& operator()(long i, long j, long k) {
856 #ifdef TENSOR_BOUNDS_CHECKING
857             TENSOR_ASSERT(i>=0 && i<_dim[0],"3d bounds check failed dim=0",i,this);
858             TENSOR_ASSERT(j>=0 && j<_dim[1],"3d bounds check failed dim=1",j,this);
859             TENSOR_ASSERT(k>=0 && k<_dim[2],"3d bounds check failed dim=2",k,this);
860 #endif
861             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]];
862         }
863 
864         /// 3-d indexing operation \em without bounds checking.
865 
866         /// @param[in] i index for dimension 0
867         /// @param[in] j index for dimension 1
868         /// @param[in] k index for dimension 2
869         /// @return %Reference to element
870         const T& operator()(long i, long j, long k) const {
871 #ifdef TENSOR_BOUNDS_CHECKING
872             TENSOR_ASSERT(i>=0 && i<_dim[0],"3d bounds check failed dim=0",i,this);
873             TENSOR_ASSERT(j>=0 && j<_dim[1],"3d bounds check failed dim=1",j,this);
874             TENSOR_ASSERT(k>=0 && k<_dim[2],"3d bounds check failed dim=2",k,this);
875 #endif
876             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]];
877         }
878 
879         /// 4-d indexing operation \em without bounds checking.
880 
881         /// @param[in] i index for dimension 0
882         /// @param[in] j index for dimension 1
883         /// @param[in] k index for dimension 2
884         /// @param[in] l index for dimension 3
885         /// @return %Reference to element
886         T& operator()(long i, long j, long k, long l) {
887 #ifdef TENSOR_BOUNDS_CHECKING
888             TENSOR_ASSERT(i>=0 && i<_dim[0],"4d bounds check failed dim=0",i,this);
889             TENSOR_ASSERT(j>=0 && j<_dim[1],"4d bounds check failed dim=1",j,this);
890             TENSOR_ASSERT(k>=0 && k<_dim[2],"4d bounds check failed dim=2",k,this);
891             TENSOR_ASSERT(l>=0 && l<_dim[3],"4d bounds check failed dim=3",l,this);
892 #endif
893             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+
894                            l*_stride[3]];
895         }
896 
897         /// 4-d indexing operation \em without bounds checking.
898 
899         /// @param[in] i index for dimension 0
900         /// @param[in] j index for dimension 1
901         /// @param[in] k index for dimension 2
902         /// @param[in] l index for dimension 3
903         /// @return %Reference to element
904         const T& operator()(long i, long j, long k, long l) const {
905 #ifdef TENSOR_BOUNDS_CHECKING
906             TENSOR_ASSERT(i>=0 && i<_dim[0],"4d bounds check failed dim=0",i,this);
907             TENSOR_ASSERT(j>=0 && j<_dim[1],"4d bounds check failed dim=1",j,this);
908             TENSOR_ASSERT(k>=0 && k<_dim[2],"4d bounds check failed dim=2",k,this);
909             TENSOR_ASSERT(l>=0 && l<_dim[3],"4d bounds check failed dim=3",l,this);
910 #endif
911             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+
912                            l*_stride[3]];
913         }
914 
915         /// 5-d indexing operation \em without bounds checking.
916 
917         /// @param[in] i index for dimension 0
918         /// @param[in] j index for dimension 1
919         /// @param[in] k index for dimension 2
920         /// @param[in] l index for dimension 3
921         /// @param[in] m index for dimension 4
922         /// @return %Reference to element
923         T& operator()(long i, long j, long k, long l, long m) {
924 #ifdef TENSOR_BOUNDS_CHECKING
925             TENSOR_ASSERT(i>=0 && i<_dim[0],"5d bounds check failed dim=0",i,this);
926             TENSOR_ASSERT(j>=0 && j<_dim[1],"5d bounds check failed dim=1",j,this);
927             TENSOR_ASSERT(k>=0 && k<_dim[2],"5d bounds check failed dim=2",k,this);
928             TENSOR_ASSERT(l>=0 && l<_dim[3],"5d bounds check failed dim=3",l,this);
929             TENSOR_ASSERT(m>=0 && m<_dim[4],"5d bounds check failed dim=4",m,this);
930 #endif
931             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+
932                            l*_stride[3]+m*_stride[4]];
933         }
934 
935         /// 5-d indexing operation \em without bounds checking.
936 
937         /// @param[in] i index for dimension 0
938         /// @param[in] j index for dimension 1
939         /// @param[in] k index for dimension 2
940         /// @param[in] l index for dimension 3
941         /// @param[in] m index for dimension 4
942         /// @return %Reference to element
943         const T& operator()(long i, long j, long k, long l, long m) const {
944 #ifdef TENSOR_BOUNDS_CHECKING
945             TENSOR_ASSERT(i>=0 && i<_dim[0],"5d bounds check failed dim=0",i,this);
946             TENSOR_ASSERT(j>=0 && j<_dim[1],"5d bounds check failed dim=1",j,this);
947             TENSOR_ASSERT(k>=0 && k<_dim[2],"5d bounds check failed dim=2",k,this);
948             TENSOR_ASSERT(l>=0 && l<_dim[3],"5d bounds check failed dim=3",l,this);
949             TENSOR_ASSERT(m>=0 && m<_dim[4],"5d bounds check failed dim=4",m,this);
950 #endif
951             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+
952                            l*_stride[3]+m*_stride[4]];
953         }
954 
955         /// 6-d indexing operation \em without bounds checking.
956 
957         /// @param[in] i index for dimension 0
958         /// @param[in] j index for dimension 1
959         /// @param[in] k index for dimension 2
960         /// @param[in] l index for dimension 3
961         /// @param[in] m index for dimension 4
962         /// @param[in] n index for dimension 5
963         /// @return %Reference to element
964         T& operator()(long i, long j, long k, long l, long m, long n) {
965 #ifdef TENSOR_BOUNDS_CHECKING
966             TENSOR_ASSERT(i>=0 && i<_dim[0],"6d bounds check failed dim=0",i,this);
967             TENSOR_ASSERT(j>=0 && j<_dim[1],"6d bounds check failed dim=1",j,this);
968             TENSOR_ASSERT(k>=0 && k<_dim[2],"6d bounds check failed dim=2",k,this);
969             TENSOR_ASSERT(l>=0 && l<_dim[3],"6d bounds check failed dim=3",l,this);
970             TENSOR_ASSERT(m>=0 && m<_dim[4],"6d bounds check failed dim=4",m,this);
971             TENSOR_ASSERT(n>=0 && n<_dim[5],"6d bounds check failed dim=5",n,this);
972 #endif
973             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+
974                            l*_stride[3]+m*_stride[4]+n*_stride[5]];
975         }
976 
977         /// 6-d indexing operation \em without bounds checking.
978 
979         /// @param[in] i index for dimension 0
980         /// @param[in] j index for dimension 1
981         /// @param[in] k index for dimension 2
982         /// @param[in] l index for dimension 3
983         /// @param[in] m index for dimension 4
984         /// @param[in] n index for dimension 5
985         /// @return %Reference to element
986         const T& operator()(long i, long j, long k, long l, long m, long n) const {
987 #ifdef TENSOR_BOUNDS_CHECKING
988             TENSOR_ASSERT(i>=0 && i<_dim[0],"6d bounds check failed dim=0",i,this);
989             TENSOR_ASSERT(j>=0 && j<_dim[1],"6d bounds check failed dim=1",j,this);
990             TENSOR_ASSERT(k>=0 && k<_dim[2],"6d bounds check failed dim=2",k,this);
991             TENSOR_ASSERT(l>=0 && l<_dim[3],"6d bounds check failed dim=3",l,this);
992             TENSOR_ASSERT(m>=0 && m<_dim[4],"6d bounds check failed dim=4",m,this);
993             TENSOR_ASSERT(n>=0 && n<_dim[5],"6d bounds check failed dim=5",n,this);
994 #endif
995             return _p[i*_stride[0]+j*_stride[1]+k*_stride[2]+
996                            l*_stride[3]+m*_stride[4]+n*_stride[5]];
997         }
998 
999         /// Politically incorrect general indexing operation \em without bounds checking.
1000 
1001         /// @param[in] ind Array containing index for each dimension
1002         /// @return %Reference to element
1003         T& operator()(const long ind[]) {
1004             long offset = 0;
1005             for (int d=0; d<_ndim; ++d) {
1006                 long i = ind[d];
1007 #ifdef TENSOR_BOUNDS_CHECKING
1008                 TENSOR_ASSERT(i>=0 && i<_dim[0],"non-PC general indexing bounds check failed dim=",d,this);
1009 #endif
1010                 offset += i*_stride[d];
1011             }
1012             return _p[offset];
1013         }
1014 
1015         /// Politically incorrect general indexing operation \em without bounds checking.
1016 
1017         /// @param[in] ind Array containing index for each dimension
1018         /// @return %Reference to element
1019         const T& operator()(const long ind[]) const {
1020             long offset = 0;
1021             for (int d=0; d<_ndim; ++d) {
1022                 long i = ind[d];
1023 #ifdef TENSOR_BOUNDS_CHECKING
1024                 TENSOR_ASSERT(i>=0 && i<_dim[0],"non-PC general indexing bounds check failed dim=",d,this);
1025 #endif
1026                 offset += i*_stride[d];
1027             }
1028             return _p[offset];
1029         }
1030 
1031         /// General indexing operation \em with bounds checking.
1032 
1033         /// @param[in] ind Vector containing index for each dimension
1034         /// @return %Reference to element
1035         T& operator()(const std::vector<long> ind) {
1036             TENSOR_ASSERT(ind.size()>=(unsigned int) _ndim,"invalid number of dimensions",ind.size(),this);
1037             long index=0;
1038             for (long d=0; d<_ndim; ++d) {
1039                 TENSOR_ASSERT(ind[d]>=0 && ind[d]<_dim[d],"out-of-bounds access",ind[d],this);
1040                 index += ind[d]*_stride[d];
1041             }
1042             return _p[index];
1043         }
1044 
1045         /// General indexing operation \em with bounds checking.
1046 
1047         /// @param[in] ind Vector containing index for each dimension
1048         /// @return %Reference to element
1049         const T& operator()(const std::vector<long> ind) const {
1050             TENSOR_ASSERT(ind.size()>=(unsigned int) _ndim,"invalid number of dimensions",ind.size(),this);
1051             long index=0;
1052             for (long d=0; d<_ndim; ++d) {
1053                 TENSOR_ASSERT(ind[d]>=0 && ind[d]<_dim[d],"out-of-bounds access",ind[d],this);
1054                 index += ind[d]*_stride[d];
1055             }
1056             return _p[index];
1057         }
1058 
1059         /// General slicing operation
1060 
1061         /// @param[in] s Vector containing slice for each dimension
1062         /// @return SliceTensor viewing patch of original tensor
1063         SliceTensor<T> operator()(const std::vector<Slice>& s) {
1064             TENSOR_ASSERT(s.size()>=(unsigned)(this->ndim()), "invalid number of dimensions",
1065                           this->ndim(),this);
1066             return SliceTensor<T>(*this,&(s[0]));
1067         }
1068 
1069         /// General slicing operation (const)
1070 
1071         /// @param[in] s Vector containing slice for each dimension
1072         /// @return Constant Tensor viewing patch of original tensor
1073         const Tensor<T> operator()(const std::vector<Slice>& s) const {
1074             TENSOR_ASSERT(s.size()>=(unsigned)(this->ndim()), "invalid number of dimensions",
1075                           this->ndim(),this);
1076             return SliceTensor<T>(*this,&(s[0]));
1077         }
1078 
1079         /// Return a 1d SliceTensor that views the specified range of the 1d Tensor
1080 
1081         /// @return SliceTensor viewing patch of original tensor
1082         SliceTensor<T> operator()(const Slice& s0) {
1083             TENSOR_ASSERT(this->ndim()==1,"invalid number of dimensions",
1084                           this->ndim(),this);
1085             Slice s[1] = {s0};
1086             return SliceTensor<T>(*this,s);
1087         }
1088 
1089         /// Return a 1d SliceTensor that views the specified range of the 1d Tensor
1090 
1091         /// @return Constant Tensor viewing patch of original tensor: \f$ R(*,*,\ldots) \rightarrow I(*,*,\ldots) \f$
1092         const Tensor<T> operator()(const Slice& s0) const {
1093             TENSOR_ASSERT(this->ndim()==1,"invalid number of dimensions",
1094                           this->ndim(),this);
1095             Slice s[1] = {s0};
1096             return SliceTensor<T>(*this,s);
1097         }
1098 
1099         /// Return a 1d SliceTensor that views the specified range of the 2d Tensor
1100 
1101         /// @return SliceTensor viewing patch of original tensor: \f$ R(*) \rightarrow I(i,*) \f$
1102         SliceTensor<T> operator()(long i, const Slice& s1) {
1103             TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions",
1104                           this->ndim(),this);
1105             Slice s[2] = {Slice(i,i,0),s1};
1106             return SliceTensor<T>(*this,s);
1107         }
1108 
1109         /// Return a 1d SliceTensor that views the specified range of the 2d Tensor
1110 
1111         /// @return Constant Tensor viewing patch of original tensor
1112         const Tensor<T> operator()(long i, const Slice& s1) const {
1113             TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions",
1114                           this->ndim(),this);
1115             Slice s[2] = {Slice(i,i,0),s1};
1116             return SliceTensor<T>(*this,s);
1117         }
1118 
1119         /// Return a 1d SliceTensor that views the specified range of the 2d Tensor
1120 
1121         /// @return SliceTensor viewing patch of original tensor
1122         SliceTensor<T> operator()(const Slice& s0, long j) {
1123             TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions",
1124                           this->ndim(),this);
1125             Slice s[2] = {s0,Slice(j,j,0)};
1126             return SliceTensor<T>(*this,s);
1127         }
1128 
1129         /// Return a 1d constant Tensor that views the specified range of the 2d Tensor
1130 
1131         /// @return Constant Tensor viewing patch of original tensor
1132         const Tensor<T> operator()(const Slice& s0, long j) const {
1133             TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions",
1134                           this->ndim(),this);
1135             Slice s[2] = {s0,Slice(j,j,0)};
1136             return SliceTensor<T>(*this,s);
1137         }
1138 
1139         /// Return a 2d SliceTensor that views the specified range of the 2d Tensor
1140 
1141         /// @return SliceTensor viewing patch of original tensor
1142         SliceTensor<T> operator()(const Slice& s0, const Slice& s1) {
1143             TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions",
1144                           this->ndim(),this);
1145             Slice s[2] = {s0,s1};
1146             return SliceTensor<T>(*this,s);
1147         }
1148 
1149         /// Return a 2d constant Tensor that views the specified range of the 2d Tensor
1150 
1151         /// @return Constant Tensor viewing patch of original tensor
1152         const Tensor<T> operator()(const Slice& s0, const Slice& s1) const {
1153             TENSOR_ASSERT(this->ndim()==2,"invalid number of dimensions",
1154                           this->ndim(),this);
1155             Slice s[2] = {s0,s1};
1156             return SliceTensor<T>(*this,s);
1157         }
1158 
1159         /// Return a 3d SliceTensor that views the specified range of the 3d Tensor
1160 
1161         /// @return SliceTensor viewing patch of original tensor
1162         SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2) {
1163             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1164                           this->ndim(),this);
1165             Slice s[3] = {s0,s1,s2};
1166             return SliceTensor<T>(*this,s);
1167         }
1168 
1169         /// Return a 3d constant Tensor that views the specified range of the 3d Tensor
1170 
1171         /// @return Constant Tensor viewing patch of original tensor
1172         const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2) const {
1173             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1174                           this->ndim(),this);
1175             Slice s[3] = {s0,s1,s2};
1176             return SliceTensor<T>(*this,s);
1177         }
1178 
1179         /// Return a 2d SliceTensor that views the specified range of the 3d Tensor
1180 
1181         /// @return SliceTensor viewing patch of original tensor
1182         SliceTensor<T> operator()(long i, const Slice& s1, const Slice& s2) {
1183             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1184                           this->ndim(),this);
1185             Slice s[3] = {Slice(i,i,0),s1,s2};
1186             return SliceTensor<T>(*this,s);
1187         }
1188 
1189         /// Return a 2d constant Tensor that views the specified range of the 3d Tensor
1190 
1191         /// @return Constant Tensor viewing patch of original tensor
1192         const Tensor<T> operator()(long i, const Slice& s1, const Slice& s2) const {
1193             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1194                           this->ndim(),this);
1195             Slice s[3] = {Slice(i,i,0),s1,s2};
1196             return SliceTensor<T>(*this,s);
1197         }
1198 
1199         /// Return a 2d SliceTensor that views the specified range of the 3d Tensor
1200 
1201         /// @return SliceTensor viewing patch of original tensor
1202         SliceTensor<T> operator()(const Slice& s0, long j, const Slice& s2) {
1203             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1204                           this->ndim(),this);
1205             Slice s[3] = {s0,Slice(j,j,0),s2};
1206             return SliceTensor<T>(*this,s);
1207         }
1208 
1209         /// Return a 2d constant Tensor that views the specified range of the 3d Tensor
1210 
1211         /// @return Constant Tensor viewing patch of original tensor
1212         const Tensor<T> operator()(const Slice& s0, long j, const Slice& s2) const {
1213             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1214                           this->ndim(),this);
1215             Slice s[3] = {s0,Slice(j,j,0),s2};
1216             return SliceTensor<T>(*this,s);
1217         }
1218 
1219         /// Return a 2d SliceTensor that views the specified range of the 3d Tensor
1220 
1221         /// @return SliceTensor viewing patch of original tensor
1222         SliceTensor<T> operator()(const Slice& s0, const Slice& s1, long k) {
1223             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1224                           this->ndim(),this);
1225             Slice s[3] = {s0,s1,Slice(k,k,0)};
1226             return SliceTensor<T>(*this,s);
1227         }
1228 
1229         /// Return a 2d constant Tensor that views the specified range of the 3d Tensor
1230 
1231         /// @return Constant Tensor viewing patch of original tensor
1232         const Tensor<T> operator()(const Slice& s0, const Slice& s1, long k) const {
1233             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1234                           this->ndim(),this);
1235             Slice s[3] = {s0,s1,Slice(k,k,0)};
1236             return SliceTensor<T>(*this,s);
1237         }
1238 
1239         /// Return a 1d SliceTensor that views the specified range of the 3d Tensor
1240 
1241         /// @return SliceTensor viewing patch of original tensor
1242         SliceTensor<T> operator()(long i, long j, const Slice& s2) {
1243             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1244                           this->ndim(),this);
1245             Slice s[3] = {Slice(i,i,0),Slice(j,j,0),s2};
1246             return SliceTensor<T>(*this,s);
1247         }
1248 
1249         /// Return a 1d constant Tensor that views the specified range of the 3d Tensor
1250 
1251         /// @return Constant Tensor viewing patch of original tensor
1252         const Tensor<T> operator()(long i, long j, const Slice& s2) const {
1253             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1254                           this->ndim(),this);
1255             Slice s[3] = {Slice(i,i,0),Slice(j,j,0),s2};
1256             return SliceTensor<T>(*this,s);
1257         }
1258 
1259         /// Return a 1d SliceTensor that views the specified range of the 3d Tensor
1260 
1261         /// @return SliceTensor viewing patch of original tensor
1262         SliceTensor<T> operator()(long i, const Slice& s1, long k) {
1263             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1264                           this->ndim(),this);
1265             Slice s[3] = {Slice(i,i,0),s1,Slice(k,k,0)};
1266             return SliceTensor<T>(*this,s);
1267         }
1268 
1269         /// Return a 1d constant Tensor that views the specified range of the 3d Tensor
1270 
1271         /// @return Constant Tensor viewing patch of original tensor
1272         const Tensor<T> operator()(long i, const Slice& s1, long k) const {
1273             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1274                           this->ndim(),this);
1275             Slice s[3] = {Slice(i,i,0),s1,Slice(k,k,0)};
1276             return SliceTensor<T>(*this,s);
1277         }
1278 
1279         /// Return a 1d SliceTensor that views the specified range of the 3d Tensor
1280 
1281         /// @return SliceTensor viewing patch of original tensor
1282         SliceTensor<T> operator()(const Slice& s0, long j, long k) {
1283             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1284                           this->ndim(),this);
1285             Slice s[3] = {s0,Slice(j,j,0),Slice(k,k,0)};
1286             return SliceTensor<T>(*this,s);
1287         }
1288 
1289         /// Return a 1d constant Tensor that views the specified range of the 3d Tensor
1290 
1291         /// @return Constant Tensor viewing patch of original tensor
1292         const Tensor<T> operator()(const Slice& s0, long j, long k) const {
1293             TENSOR_ASSERT(this->ndim()==3,"invalid number of dimensions",
1294                           this->ndim(),this);
1295             Slice s[3] = {s0,Slice(j,j,0),Slice(k,k,0)};
1296             return SliceTensor<T>(*this,s);
1297         }
1298 
1299         /// Return a 1-4d SliceTensor that views the specified range of the 4d Tensor
1300 
1301         /// @return SliceTensor viewing patch of original tensor
1302         SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2,
1303                                   const Slice& s3) {
1304             TENSOR_ASSERT(this->ndim()==4,"invalid number of dimensions",
1305                           this->ndim(),this);
1306             Slice s[4] = {s0,s1,s2,s3};
1307             return SliceTensor<T>(*this,s);
1308         }
1309 
1310         /// Return a 1-4d constant Tensor that views the specified range of the 4d Tensor
1311 
1312         /// @return Constant Tensor viewing patch of original tensor
1313         const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2,
1314                                   const Slice& s3) const {
1315             TENSOR_ASSERT(this->ndim()==4,"invalid number of dimensions",
1316                           this->ndim(),this);
1317             Slice s[4] = {s0,s1,s2,s3};
1318             return SliceTensor<T>(*this,s);
1319         }
1320 
1321         /// Return a 1-5d SliceTensor that views the specified range of the 5d Tensor
1322 
1323         /// @return SliceTensor viewing patch of original tensor
1324         SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2,
1325                                   const Slice& s3, const Slice& s4) {
1326             TENSOR_ASSERT(this->ndim()==5,"invalid number of dimensions",
1327                           this->ndim(),this);
1328             Slice s[5] = {s0,s1,s2,s3,s4};
1329             return SliceTensor<T>(*this,s);
1330         }
1331 
1332         /// Return a 1-5d constant Tensor that views the specified range of the 5d Tensor
1333 
1334         /// @return Constant Tensor viewing patch of original tensor
1335         const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2,
1336                                   const Slice& s3, const Slice& s4) const {
1337             TENSOR_ASSERT(this->ndim()==5,"invalid number of dimensions",
1338                           this->ndim(),this);
1339             Slice s[5] = {s0,s1,s2,s3,s4};
1340             return SliceTensor<T>(*this,s);
1341         }
1342 
1343         /// Return a 1-6d SliceTensor that views the specified range of the 6d Tensor
1344 
1345         /// @return SliceTensor viewing patch of original tensor
1346         SliceTensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2,
1347                                   const Slice& s3, const Slice& s4, const Slice& s5) {
1348             TENSOR_ASSERT(this->ndim()==6,"invalid number of dimensions",
1349                           this->ndim(),this);
1350             Slice s[6] = {s0,s1,s2,s3,s4,s5};
1351             return SliceTensor<T>(*this,s);
1352         }
1353 
1354 
1355         /// Return a 1-6d constant Tensor that views the specified range of the 6d Tensor
1356 
1357         /// @return Constant Tensor viewing patch of original tensor
1358         const Tensor<T> operator()(const Slice& s0, const Slice& s1, const Slice& s2,
1359                                   const Slice& s3, const Slice& s4, const Slice& s5) const {
1360             TENSOR_ASSERT(this->ndim()==6,"invalid number of dimensions",
1361                           this->ndim(),this);
1362             Slice s[6] = {s0,s1,s2,s3,s4,s5};
1363             return SliceTensor<T>(*this,s);
1364         }
1365 
1366         /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor
1367 
1368         /// @param[in] ndimnew Number of dimensions in the result
1369         /// @param[in] d Array containing size of each new dimension
1370         /// @return New tensor (viewing same underlying data as the original but with different shape)
1371         Tensor<T> reshape(int ndimnew, const long* d) {
1372             Tensor<T> result(*this);
1373             result.reshape_inplace(ndimnew,d);
1374             return result;
1375         }
1376 
1377         /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor
1378 
1379         /// @param[in] ndimnew Number of dimensions in the result
1380         /// @param[in] d Array containing size of each new dimension
1381         /// @return New tensor (viewing same underlying data as the original but with different shape)
1382         const Tensor<T> reshape(int ndimnew, const long* d) const {
1383             Tensor<T> result(*const_cast<Tensor<T>*>(this));
1384             result.reshape_inplace(ndimnew,d);
1385             return result;
1386         }
1387 
1388         /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor
1389 
1390         /// @param[in] d Array containing size of each new dimension
1391         /// @return New tensor (viewing same underlying data as the original but with different shape)
1392         Tensor<T> reshape(const std::vector<long>& d) {
1393 	  return reshape(d.size(), d.size() ? &d[0] : 0);
1394         }
1395 
1396         /// Returns new view/tensor reshaping size/number of dimensions to conforming tensor
1397 
1398         /// @param[in] d Array containing size of each new dimension
1399         /// @return New tensor (viewing same underlying data as the original but with different shape)
1400         const Tensor<T> reshape(const std::vector<long>& d) const {
1401             return reshape(d.size(), d.size() ? &d[0] : 0);
1402         }
1403 
1404         /// Returns new view/tensor rehapings to conforming 1-d tensor with given dimension
1405 
1406         /// @param[in] dim0 Size of new dimension 0
1407         /// @return New tensor (viewing same underlying data as the original but with different shape)
1408         Tensor<T> reshape(long dim0) {
1409             long d[1] = {dim0};
1410             return reshape(1,d);
1411         }
1412         /// Returns new view/tensor rehapings to conforming 1-d tensor with given dimension
1413 
1414         /// @param[in] dim0 Size of new dimension 0
1415         /// @return New tensor (viewing same underlying data as the original but with different shape)
1416         const Tensor<T> reshape(long dim0) const {
1417             long d[1] = {dim0};
1418             return reshape(1,d);
1419         }
1420 
1421         /// Returns new view/tensor rehaping to conforming 2-d tensor with given dimensions
1422 
1423         /// @param[in] dim0 Size of new dimension 0
1424         /// @param[in] dim1 Size of new dimension 1
1425         /// @return New tensor (viewing same underlying data as the original but with different shape)
1426         Tensor<T> reshape(long dim0, long dim1) {
1427             long d[2] = {dim0,dim1};
1428             return reshape(2,d);
1429         }
1430 
1431         /// Returns new view/tensor rehaping to conforming 2-d tensor with given dimensions
1432 
1433         /// @param[in] dim0 Size of new dimension 0
1434         /// @param[in] dim1 Size of new dimension 1
1435         /// @return New tensor (viewing same underlying data as the original but with different shape)
1436         const Tensor<T> reshape(long dim0, long dim1) const {
1437             long d[2] = {dim0,dim1};
1438             return reshape(2,d);
1439         }
1440 
1441         /// Returns new view/tensor rehaping to conforming 3-d tensor with given dimensions
1442 
1443         /// @param[in] dim0 Size of new dimension 0
1444         /// @param[in] dim1 Size of new dimension 1
1445         /// @param[in] dim2 Size of new dimension 2
1446         /// @return New tensor (viewing same underlying data as the original but with different shape)
1447         Tensor<T> reshape(long dim0, long dim1, long dim2) {
1448             long d[3] = {dim0,dim1,dim2};
1449             return reshape(3,d);
1450         }
1451 
1452         /// Returns new view/tensor rehaping to conforming 3-d tensor with given dimensions
1453 
1454         /// @param[in] dim0 Size of new dimension 0
1455         /// @param[in] dim1 Size of new dimension 1
1456         /// @param[in] dim2 Size of new dimension 2
1457         /// @return New tensor (viewing same underlying data as the original but with different shape)
1458         const Tensor<T> reshape(long dim0, long dim1, long dim2) const {
1459             long d[3] = {dim0,dim1,dim2};
1460             return reshape(3,d);
1461         }
1462 
1463         /// Returns new view/tensor rehaping to conforming 4-d tensor with given dimensions
1464 
1465         /// @param[in] dim0 Size of new dimension 0
1466         /// @param[in] dim1 Size of new dimension 1
1467         /// @param[in] dim2 Size of new dimension 2
1468         /// @param[in] dim3 Size of new dimension 3
1469         /// @return New tensor (viewing same underlying data as the original but with different shape)
1470         Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3) {
1471             long d[4] = {dim0,dim1,dim2,dim3};
1472             return reshape(4,d);
1473         }
1474 
1475         /// Returns new view/tensor rehaping to conforming 4-d tensor with given dimensions
1476 
1477         /// @param[in] dim0 Size of new dimension 0
1478         /// @param[in] dim1 Size of new dimension 1
1479         /// @param[in] dim2 Size of new dimension 2
1480         /// @param[in] dim3 Size of new dimension 3
1481         /// @return New tensor (viewing same underlying data as the original but with different shape)
1482         const Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3) const {
1483             long d[4] = {dim0,dim1,dim2,dim3};
1484             return reshape(4,d);
1485         }
1486 
1487         /// Returns new view/tensor rehaping to conforming 5-d tensor with given dimensions
1488 
1489         /// @param[in] dim0 Size of new dimension 0
1490         /// @param[in] dim1 Size of new dimension 1
1491         /// @param[in] dim2 Size of new dimension 2
1492         /// @param[in] dim3 Size of new dimension 3
1493         /// @param[in] dim4 Size of new dimension 4
1494         /// @return New tensor (viewing same underlying data as the original but with different shape)
1495         Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4) {
1496             long d[5] = {dim0,dim1,dim2,dim3,dim4};
1497             return reshape(5,d);
1498         }
1499 
1500         /// Returns new view/tensor rehaping to conforming 5-d tensor with given dimensions
1501 
1502         /// @param[in] dim0 Size of new dimension 0
1503         /// @param[in] dim1 Size of new dimension 1
1504         /// @param[in] dim2 Size of new dimension 2
1505         /// @param[in] dim3 Size of new dimension 3
1506         /// @param[in] dim4 Size of new dimension 4
1507         /// @return New tensor (viewing same underlying data as the original but with different shape)
1508         const Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4) const {
1509             long d[5] = {dim0,dim1,dim2,dim3,dim4};
1510             return reshape(5,d);
1511         }
1512 
1513         /// Returns new view/tensor rehaping to conforming 6-d tensor with given dimensions
1514 
1515         /// @param[in] dim0 Size of new dimension 0
1516         /// @param[in] dim1 Size of new dimension 1
1517         /// @param[in] dim2 Size of new dimension 2
1518         /// @param[in] dim3 Size of new dimension 3
1519         /// @param[in] dim4 Size of new dimension 4
1520         /// @param[in] dim5 Size of new dimension 5
1521         /// @return New tensor (viewing same underlying data as the original but with different shape)
1522         Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4, long dim5) {
1523             long d[6] = {dim0,dim1,dim2,dim3,dim4,dim5};
1524             return reshape(6,d);
1525         }
1526 
1527         /// Returns new view/tensor rehaping to conforming 6-d tensor with given dimensions
1528 
1529         /// @param[in] dim0 Size of new dimension 0
1530         /// @param[in] dim1 Size of new dimension 1
1531         /// @param[in] dim2 Size of new dimension 2
1532         /// @param[in] dim3 Size of new dimension 3
1533         /// @param[in] dim4 Size of new dimension 4
1534         /// @param[in] dim5 Size of new dimension 5
1535         /// @return New tensor (viewing same underlying data as the original but with different shape)
1536         const Tensor<T> reshape(long dim0, long dim1, long dim2, long dim3, long dim4, long dim5) const {
1537             long d[6] = {dim0,dim1,dim2,dim3,dim4,dim5};
1538             return reshape(6,d);
1539         }
1540 
1541         /// Returns new view/tensor rehshaping to flat (1-d) tensor
1542         Tensor<T> flat() {
1543             long d[1] = {_size};
1544             return reshape(1,d);
1545         }
1546 
1547         /// Returns new view/tensor rehshaping to flat (1-d) tensor
1548         const Tensor<T> flat() const {
1549             long d[1] = {_size};
1550             return reshape(1,d);
1551         }
1552 
1553         /// Returns new view/tensor splitting dimension \c i as \c dimi0*dimi1 to produce conforming d+1 dimension tensor
1554 
1555         /// @return New tensor (viewing same underlying data as the original but with additional dimensions)
1556         Tensor<T> splitdim(long i, long dimi0, long dimi1) {
1557             Tensor<T> result(*this);
1558             result.splitdim_inplace(i, dimi0, dimi1);
1559             return result;
1560         }
1561 
1562         /// Returns new view/tensor splitting dimension \c i as \c dimi0*dimi1 to produce conforming d+1 dimension tensor
1563 
1564         /// @return New tensor (viewing same underlying data as the original but with additional dimensions)
1565         const Tensor<T> splitdim(long i, long dimi0, long dimi1) const {
1566             Tensor<T> result(*const_cast<Tensor<T>*>(this));
1567             result.splitdim_inplace(i, dimi0, dimi1);
1568             return result;
1569         }
1570 
1571         /// Returns new view/tensor fusing contiguous dimensions \c i and \c i+1
1572 
1573         /// @return New tensor (viewing same underlying data as the original but with fewer dimensions)
1574         Tensor<T> fusedim(long i) {
1575             Tensor<T> result(*this);
1576             result.fusedim_inplace(i);
1577             return result;
1578         }
1579 
1580         /// Returns new view/tensor fusing contiguous dimensions \c i and \c i+1
1581 
1582         /// @return New tensor (viewing same underlying data as the original but with fewer dimensions)
1583         const Tensor<T> fusedim(long i) const {
1584             Tensor<T> result(*const_cast<Tensor<T>*>(this));
1585             result.fusedim_inplace(i);
1586             return result;
1587         }
1588 
1589         /// Returns new view/tensor swaping dimensions \c i and \c j
1590 
1591         /// @return New tensor (viewing same underlying data as the original but with reordered dimensions)
1592         Tensor<T> swapdim(long idim, long jdim) {
1593             Tensor<T> result(*this);
1594             result.swapdim_inplace(idim, jdim);
1595             return result;
1596         }
1597 
1598         /// Returns new view/tensor swaping dimensions \c i and \c j
1599 
1600         /// @return New tensor (viewing same underlying data as the original but with reordered dimensions)
1601         const Tensor<T> swapdim(long idim, long jdim) const {
1602             Tensor<T> result(*const_cast<Tensor<T>*>(this));
1603             result.swapdim_inplace(idim, jdim);
1604             return result;
1605         }
1606 
1607         /// Returns new view/tensor permuting the dimensions
1608 
1609         /// @param[in] map Old dimension i becomes new dimension \c map[i]
1610         /// @return New tensor (viewing same underlying data as the original but with reordered dimensions)
1611         Tensor<T> mapdim(const std::vector<long>& map) {
1612             Tensor<T> result(*this);
1613             result.mapdim_inplace(map);
1614             return result;
1615         }
1616 
1617         /// Returns new view/tensor permuting the dimensions
1618 
1619         /// @return New tensor (viewing same underlying data as the original but with reordered dimensions)
1620         const Tensor<T> mapdim(const std::vector<long>& map) const {
1621             Tensor<T> result(*const_cast<Tensor<T>*>(this));
1622             result.mapdim_inplace(map);
1623             return result;
1624         }
1625 
1626 
1627         /// Returns new view/tensor cycling the sub-dimensions `(start,...,end)` with `shift` steps
1628         Tensor<T> cycledim(long nshift, long start, long end) {
1629             Tensor<T> result(*this);
1630             result.cycledim_inplace(nshift, start, end);
1631             return result;
1632         }
1633 
1634 
1635         /// Returns new view/tensor cycling the sub-dimensions `(start,...,end)` with `shift` steps
1636         const Tensor<T> cycledim(long nshift, long start, long end) const {
1637             Tensor<T> result(*const_cast<Tensor<T>*>(this));
1638             result.cycledim_inplace(nshift, start, end);
1639             return result;
1640         }
1641 
1642 
1643         /// Test if \c *this and \c t conform.
1644         template <class Q> bool conforms(const Tensor<Q>& t) const {
1645             return BaseTensor::conforms(&t);
1646         }
1647 
1648         /// Returns the sum of all elements of the tensor
1649         T sum() const {
1650             T result = 0;
1651             UNARY_OPTIMIZED_ITERATOR(const T,(*this),result += *_p0);
1652             return result;
1653         }
1654 
1655         /// Returns the sum of the squares of the elements
1656         T sumsq() const {
1657             T result = 0;
1658             UNARY_OPTIMIZED_ITERATOR(const T,(*this),result += (*_p0) * (*_p0));
1659             return result;
1660         }
1661 
1662         /// Return the product of all elements of the tensor
1663         T product() const {
1664             T result = 1;
1665             UNARY_OPTIMIZED_ITERATOR(const T,(*this),result *= *_p0);
1666             return result;
1667         }
1668 
1669         /// Return the minimum value (and if ind is non-null, its index) in the Tensor
1670         T min(long* ind=0) const {
1671             T result = *(this->_p);
1672             if (ind) {
1673                 for (long i=0; i<_ndim; ++i) ind[i]=0;
1674                 long nd = _ndim-1;
1675                 UNARY_UNOPTIMIZED_ITERATOR(const T,(*this),
1676                                            if (result > *_p0) {
1677                                                result = *_p0;
1678                                                for (long i=0; i<nd; ++i) ind[i]=iter.ind[i];
1679                                                ind[nd] = _j;
1680                                            }
1681                                            );
1682             }
1683             else {
1684                 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::min<T>(result,*_p0));
1685             }
1686             return result;
1687         }
1688 
1689         /// Return the maximum value (and if ind is non-null, its index) in the Tensor
1690         T max(long* ind=0) const {
1691             T result = *(this->_p);
1692             if (ind) {
1693                 for (long i=0; i<_ndim; ++i) ind[i]=0;
1694                 long nd = _ndim-1;
1695                 UNARY_UNOPTIMIZED_ITERATOR(const T,(*this),
1696                                            if (result < *_p0) {
1697                                                result = *_p0;
1698                                                for (long i=0; i<nd; ++i) ind[i]=iter.ind[i];
1699                                                ind[nd] = _j;
1700                                            }
1701                                            );
1702             }
1703             else {
1704                 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::max<T>(result,*_p0));
1705             }
1706             return result;
1707         }
1708 
1709         // For complex types, this next group returns the appropriate real type
1710         // For real types, the same type as T is returned (type_data.h)
1711 
1712         /// Returns the Frobenius norm of the tensor
1713         float_scalar_type normf() const {
1714             float_scalar_type result = 0;
1715             UNARY_OPTIMIZED_ITERATOR(const T,(*this),result += ::madness::detail::mynorm(*_p0));
1716             return (float_scalar_type) std::sqrt(result);
1717         }
1718 
1719         /// Return the absolute minimum value (and if ind is non-null, its index) in the Tensor
1720         scalar_type absmin(long *ind = 0) const {
1721             scalar_type result = std::abs(*(this->_p));
1722             if (ind) {
1723                 for (long i=0; i<_ndim; ++i) ind[i]=0;
1724                 long nd = _ndim-1;
1725                 UNARY_UNOPTIMIZED_ITERATOR(const T,(*this),
1726                                            scalar_type absval = std::abs(*_p0);
1727                                            if (result > absval) {
1728                                                result = absval;
1729                                                for (long i=0; i<nd; ++i) ind[i]=iter.ind[i];
1730                                                ind[nd] = _j;
1731                                            }
1732                                            );
1733             }
1734             else {
1735                 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::min<scalar_type>(result,std::abs(*_p0)));
1736             }
1737             return result;
1738         }
1739 
1740         /// Return the absolute maximum value (and if ind is non-null, its index) in the Tensor
1741         scalar_type absmax(long *ind = 0) const {
1742             scalar_type result = std::abs(*(this->_p));
1743             if (ind) {
1744                 for (long i=0; i<_ndim; ++i) ind[i]=0;
1745                 long nd = _ndim-1;
1746                 UNARY_UNOPTIMIZED_ITERATOR(T,(*this),
1747                                            scalar_type absval = std::abs(*_p0);
1748                                            if (result < absval) {
1749                                                result = absval;
1750                                                for (long i=0; i<nd; ++i) ind[i]=iter.ind[i];
1751                                                ind[nd] = _j;
1752                                            }
1753                                            );
1754             }
1755             else {
1756                 UNARY_OPTIMIZED_ITERATOR(const T,(*this),result=std::max<scalar_type>(result,std::abs(*_p0)));
1757             }
1758             return result;
1759         }
1760 
1761 
1762         /// Return the trace of two tensors (no complex conjugate invoked)
1763         T trace(const Tensor<T>& t) const {
1764             T result = 0;
1765             BINARY_OPTIMIZED_ITERATOR(const T,(*this),const T,t,result += (*_p0)*(*_p1));
1766             return result;
1767         }
1768 
1769         /// Return the trace of two tensors with complex conjugate of the leftmost (i.e., this)
1770         template <class Q>
1771         TENSOR_RESULT_TYPE(T,Q) trace_conj(const Tensor<Q>& t) const {
1772             TENSOR_RESULT_TYPE(T,Q) result = 0;
1773             BINARY_OPTIMIZED_ITERATOR(const T,(*this),const Q,t,result += conditional_conj(*_p0)*(*_p1));
1774             return result;
1775         }
1776 
1777         /// Inplace apply a unary function to each element of the tensor
1778         template <typename opT>
1779         Tensor<T>& unaryop(opT& op) {
1780             UNARY_OPTIMIZED_ITERATOR(T,(*this),*_p0=op(*_p0));
1781             return *this;
1782         }
1783 
1784         /// Inplace multiply by corresponding elements of argument Tensor
1785         Tensor<T>& emul(const Tensor<T>& t) {
1786             BINARY_OPTIMIZED_ITERATOR(T,(*this),const T,t,*_p0 *= *_p1);
1787             return *this;
1788         }
1789 
1790         /// Inplace generalized saxpy ... this = this*alpha + other*beta
1791         Tensor<T>& gaxpy(T alpha, const Tensor<T>& t, T beta) {
1792             if (iscontiguous() && t.iscontiguous()) {
1793                 T* MADNESS_RESTRICT a = ptr();
1794                 const T* MADNESS_RESTRICT b = t.ptr();
1795                 if (alpha == T(1.0)) {
1796                     for (long i=0; i<_size; ++i) a[i] += b[i]*beta;
1797                 }
1798                 else {
1799                     for (long i=0; i<_size; ++i) a[i] = a[i]*alpha + b[i]*beta;
1800                 }
1801             }
1802             else {
1803                 //BINARYITERATOR(T,(*this),T,t, (*_p0) = alpha*(*_p0) + beta*(*_p1));
1804                 BINARY_OPTIMIZED_ITERATOR(T,(*this),const T,t, (*_p0) = alpha*(*_p0) + beta*(*_p1));
1805                 //ITERATOR((*this),(*this)(IND) = alpha*(*this)(IND) + beta*t(IND));
1806             }
1807             return *this;
1808         }
1809 
1810         /// Returns a pointer to the internal data
1811         T* ptr() {
1812             return _p;
1813         }
1814 
1815         /// Returns a pointer to the internal data
1816         const T* ptr() const {
1817             return _p;
1818         }
1819 
1820         /// Returns a pointer to the base class
1821         BaseTensor* base() {
1822             return static_cast<BaseTensor*>(this);
1823         }
1824 
1825         /// Returns a pointer to the base class
1826         const BaseTensor* base() const {
1827             return static_cast<BaseTensor*>(this);
1828         }
1829 
1830         /// Return iterator over single tensor
1831         TensorIterator<T> unary_iterator(long iterlevel=0,
1832                                          bool optimize=true,
1833                                          bool fusedim=true,
1834                                          long jdim=default_jdim) const {
1835             return TensorIterator<T>(this,(const Tensor<T>*) 0, (const Tensor<T>*) 0,
1836                                      iterlevel, optimize, fusedim, jdim);
1837         }
1838 
1839         /// Return iterator over two tensors
1840         template <class Q>
1841         TensorIterator<T,Q> binary_iterator(const Tensor<Q>& q,
1842                                             long iterlevel=0,
1843                                             bool optimize=true,
1844                                             bool fusedim=true,
1845                                             long jdim=default_jdim) const {
1846             return TensorIterator<T,Q>(this,&q,(const Tensor<T>*) 0,
1847                                        iterlevel, optimize, fusedim, jdim);
1848         }
1849 
1850         /// Return iterator over three tensors
1851         template <class Q, class R>
1852         TensorIterator<T,Q,R> ternary_iterator(const Tensor<Q>& q,
1853                                                const Tensor<R>& r,
1854                                                long iterlevel=0,
1855                                                bool optimize=true,
1856                                                bool fusedim=true,
1857                                                long jdim=default_jdim) const {
1858             return TensorIterator<T,Q,R>(this,&q,&r,
1859                                          iterlevel, optimize, fusedim, jdim);
1860         }
1861 
1862         /// End point for forward iteration
1863         const TensorIterator<T>& end() const {
1864             static TensorIterator<T> theend(0,0,0,0,0,0);
1865             return theend;
1866         }
1867 
1868         virtual ~Tensor() {}
1869 
1870         /// Frees all memory and resests to state of default constructor
1871         void clear() {deallocate();}
1872 
1873         bool has_data() const {return size()!=0;};
1874 
1875     };
1876 
1877     template <class T>
1878     std::ostream& operator << (std::ostream& out, const Tensor<T>& t);
1879 
1880 
1881     namespace archive {
1882         /// Serialize a tensor
1883         template <class Archive, typename T>
1884         struct ArchiveStoreImpl< Archive, Tensor<T> > {
1885             static void store(const Archive& s, const Tensor<T>& t) {
1886                 if (t.iscontiguous()) {
1887                     s & t.size() & t.id();
1888                     if (t.size()) s & t.ndim() & wrap(t.dims(),TENSOR_MAXDIM) & wrap(t.ptr(),t.size());
1889                 }
1890                 else {
1891                     s & copy(t);
1892                 }
1893             };
1894         };
1895 
1896 
1897         /// Deserialize a tensor ... existing tensor is replaced
1898         template <class Archive, typename T>
1899         struct ArchiveLoadImpl< Archive, Tensor<T> > {
1900             static void load(const Archive& s, Tensor<T>& t) {
1901                 long sz = 0l, id = 0l;
1902                 s & sz & id;
1903                 if (id != t.id()) throw "type mismatch deserializing a tensor";
1904                 if (sz) {
1905                     long _ndim = 0l, _dim[TENSOR_MAXDIM];
1906                     s & _ndim & wrap(_dim,TENSOR_MAXDIM);
1907                     t = Tensor<T>(_ndim, _dim, false);
1908                     if (sz != t.size()) throw "size mismatch deserializing a tensor";
1909                     s & wrap(t.ptr(), t.size());
1910                 }
1911                 else {
1912                     t = Tensor<T>();
1913                 }
1914             };
1915         };
1916 
1917     }
1918 
1919     /// The class defines tensor op scalar ... here define scalar op tensor.
1920 
1921     /// \ingroup tensor
1922     template <typename T, typename Q>
1923     typename IsSupported < TensorTypeData<Q>, Tensor<T> >::type
1924     operator+(Q x, const Tensor<T>& t) {
1925         return t+x;
1926     }
1927 
1928     /// The class defines tensor op scalar ... here define scalar op tensor.
1929 
1930     /// \ingroup tensor
1931     template <typename T, typename Q>
1932     typename IsSupported < TensorTypeData<Q>, Tensor<T> >::type
1933     operator*(const Q& x, const Tensor<T>& t) {
1934         return t*x;
1935     }
1936 
1937     /// The class defines tensor op scalar ... here define scalar op tensor.
1938 
1939     /// \ingroup tensor
1940     template <typename T, typename Q>
1941     typename IsSupported < TensorTypeData<Q>, Tensor<T> >::type
1942     operator-(Q x, const Tensor<T>& t) {
1943         return (-t)+=x;
1944     }
1945 
1946     /// Returns a new contiguous tensor that is a deep copy of the input
1947 
1948     /// \ingroup tensor
1949     /// @result Returns a new contiguous tensor that is a deep copy of the input
1950     template <class T> Tensor<T> copy(const Tensor<T>& t) {
1951         if (t.size()) {
1952             Tensor<T> result = Tensor<T>(t.ndim(),t.dims(),false);
1953             BINARY_OPTIMIZED_ITERATOR(T, result, const T, t, *_p0 = *_p1);
1954             return result;
1955         }
1956         else {
1957             return Tensor<T>();
1958         }
1959     }
1960 
1961     /// Returns a new contiguous tensor of type Q that is a deep copy of the input
1962 
1963     /// \ingroup tensor
1964     /// @result Returns a new contiguous tensor that is a deep copy of the input
1965     template <class Q, class T>
1966     Tensor<Q> convert(const Tensor<T>& t) {
1967         if (t.size()) {
1968             Tensor<Q> result = Tensor<Q>(t.ndim(),t.dims(),false);
1969             BINARY_OPTIMIZED_ITERATOR(Q, result, const T, t, *_p0 = *_p1);
1970             return result;
1971         }
1972         else {
1973             return Tensor<Q>();
1974         }
1975     }
1976 
1977 
1978     /// Transforms one dimension of the tensor t by the matrix c, returns new contiguous tensor
1979 
1980     /// \ingroup tensor
1981     /// \code
1982     /// transform_dir(t,c,1) = r(i,j,k,...) = sum(j') t(i,j',k,...) * c(j',j)
1983     /// \endcode
1984     /// @param[in] t Tensor to transform (size of dimension to be transformed must match size of first dimension of \c c )
1985     /// @param[in] c Matrix used for the transformation
1986     /// @param[in] axis Dimension (or axis) to be transformed
1987     /// @result Returns a new, contiguous tensor
1988     template <class T, class Q>
1989     Tensor<TENSOR_RESULT_TYPE(T,Q)> transform_dir(const Tensor<T>& t, const Tensor<Q>& c, int axis) {
1990         if (axis == 0) {
1991             return inner(c,t,0,axis);
1992         }
1993         else if (axis == t.ndim()-1) {
1994             return inner(t,c,axis,0);
1995         }
1996         else {
1997             return copy(inner(t,c,axis,0).cycledim(1,axis, -1)); // Copy to make contiguous
1998         }
1999     }
2000 
2001     /// Returns a new deep copy of the transpose of the input tensor
2002 
2003     /// \ingroup tensor
2004     template <class T>
2005     Tensor<T> transpose(const Tensor<T>& t) {
2006         TENSOR_ASSERT(t.ndim() == 2, "transpose requires a matrix", t.ndim(), &t);
2007         return copy(t.swapdim(0,1));
2008     }
2009 
2010     /// Returns a new deep copy of the complex conjugate transpose of the input tensor
2011 
2012     /// \ingroup tensor
2013     template <class T>
2014     Tensor<T> conj_transpose(const Tensor<T>& t) {
2015         TENSOR_ASSERT(t.ndim() == 2, "conj_transpose requires a matrix", t.ndim(), &t);
2016         return conj(t.swapdim(0,1));
2017     }
2018 
2019     /// Indexing a non-constant tensor with slices returns a SliceTensor
2020 
2021     /// \ingroup tensor
2022     /// A slice tensor differs from a tensor only in that assignment
2023     /// causes the data to be copied rather than entire new copy
2024     /// generated.  You will usually not instantiate one except as a
2025     /// temporary produced by indexing a tensor with slice and then
2026     /// assigning it back to a tensor, or performing some other
2027     /// operation and discarding.
2028     template <class T> class SliceTensor : public Tensor<T> {
2029     private:
2030         SliceTensor<T>();
2031 
2032     public:
2033         SliceTensor(const Tensor<T>& t, const Slice s[])
2034             : Tensor<T>(const_cast<Tensor<T>&>(t)) //!!!!!!!!!!!
2035         {
2036             // C++ standard says class derived from parameterized base class cannot
2037             // directly access the base class elements ... must explicitly reference.
2038 
2039             long nd = 0, size=1;
2040             for (long i=0; i<t._ndim; ++i) {
2041                 long start=s[i].start, end=s[i].end, step=s[i].step;
2042                 //std::printf("%ld input start=%ld end=%ld step=%ld\n",
2043                 //i, start, end, step);
2044                 if (start < 0) start += this->_dim[i];
2045                 if (end < 0) end += this->_dim[i];
2046                 long len = end-start+1;
2047                 if (step) len /= step;	// Rounds len towards zero
2048 
2049                 // if input length is not exact multiple of step, round end towards start
2050                 // for the same behaviour of for (i=start; i<=end; i+=step);
2051                 end = start + (len-1)*step;
2052 
2053                 //std::printf("%ld munged start=%ld end=%ld step=%ld len=%ld _dim=%ld\n",
2054                 //		i, start, end, step, len, this->_dim[i]);
2055 
2056                 TENSOR_ASSERT(start>=0 && start<this->_dim[i],"slice start invalid",start,this);
2057                 TENSOR_ASSERT(end>=0 && end<this->_dim[i],"slice end invalid",end,this);
2058                 TENSOR_ASSERT(len>0,"slice length must be non-zero",len,this);
2059 
2060                 this->_p += start * t._stride[i];
2061 
2062                 if (step) {
2063                     size *= len;
2064                     this->_dim[nd] = len;
2065                     this->_stride[nd] = step * t._stride[i];
2066                     ++nd;
2067                 }
2068             }
2069             //For Python interface need to be able to return a scalar inside a tensor with nd=0
2070             //TENSOR_ASSERT(nd>0,"slicing produced a scalar, but cannot return one",nd,this);
2071             for (long i=nd; i<TENSOR_MAXDIM; ++i) { // So can iterate over missing dimensions
2072                 this->_dim[i] = 1;
2073                 this->_stride[i] = 0;
2074             }
2075 
2076             this->_ndim = nd;
2077             this->_size = size;
2078         }
2079 
2080         SliceTensor<T>& operator=(const SliceTensor<T>& t) {
2081             BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 = (T)(*_p1));
2082             return *this;
2083         }
2084 
2085         template <class Q>
2086         SliceTensor<T>& operator=(const SliceTensor<Q>& t) {
2087             BINARY_OPTIMIZED_ITERATOR(T, (*this), const Q, t, *_p0 = (T)(*_p1));
2088             return *this;
2089         }
2090 
2091         SliceTensor<T>& operator=(const Tensor<T>& t) {
2092             BINARY_OPTIMIZED_ITERATOR(T, (*this), const T, t, *_p0 = (T)(*_p1));
2093             return *this;
2094         }
2095 
2096         template <class Q>
2097         SliceTensor<T>& operator=(const Tensor<Q>& t) {
2098             BINARY_OPTIMIZED_ITERATOR(T, (*this), const Q, t, *_p0 = (T)(*_p1));
2099             return *this;
2100         }
2101 
2102         SliceTensor<T>& operator=(const T& t) {
2103             UNARY_OPTIMIZED_ITERATOR(T, (*this), *_p0 = t);
2104             return *this;
2105         }
2106 
2107         virtual ~SliceTensor() {};		// Tensor<T> destructor does enough
2108     };
2109 
2110 
2111     // Specializations for complex types
2112     template<> float_complex Tensor<float_complex>::min(long* ind) const ;
2113     template<> double_complex Tensor<double_complex>::min(long* ind) const ;
2114     template<> float_complex Tensor<float_complex>::max(long* ind) const ;
2115     template<> double_complex Tensor<double_complex>::max(long* ind) const ;
2116 
2117     // Stream stuff
2118 
2119     /// Print (for human consumption) a tensor to the stream
2120 
2121     /// \ingroup tensor
2122     template <class T>
2123     std::ostream& operator << (std::ostream& s, const Tensor<T>& t) {
2124         if (t.size() == 0) {
2125             s << "[empty tensor]\n";
2126             return s;
2127         }
2128 
2129         long maxdim = 0;
2130         long index_width = 0;
2131         for (int i = 0; i<(t.ndim()-1); ++i) {
2132             if (maxdim < t.dim(i)) maxdim = t.dim(i);
2133         }
2134         if (maxdim < 10)
2135             index_width = 1;
2136         else if (maxdim < 100)
2137             index_width = 2;
2138         else if (maxdim < 1000)
2139             index_width = 3;
2140         else if (maxdim < 10000)
2141             index_width = 4;
2142         else
2143             index_width = 6;
2144 
2145         std::ios::fmtflags oldflags = s.setf(std::ios::scientific);
2146         long oldprec = s.precision();
2147         long oldwidth = s.width();
2148 
2149         // C++ formatted IO is worse than Fortran !!
2150         for (TensorIterator<T> iter=t.unary_iterator(1,false,false); iter!=t.end(); ++iter) {
2151             const T* p = iter._p0;
2152             long inc = iter._s0;
2153             long dimj = iter.dimj;
2154             s.unsetf(std::ios::scientific);
2155             s << '[';
2156             for (long i=0; i<iter.ndim; ++i) {
2157                 s.width(index_width);
2158                 s << iter.ind[i];
2159                 //if (i != iter.ndim)
2160                 s << ",";
2161             }
2162             s << "*]";
2163 //flo            s.setf(std::ios::scientific);
2164             s.setf(std::ios::fixed);
2165             for (long j=0; j<dimj; ++j, p+=inc) {
2166 //flo                s.precision(4);
2167                 s << " ";
2168                 s.precision(8);
2169                 s.width(12);
2170                 s << *p;
2171             }
2172             s.unsetf(std::ios::scientific);
2173             s << std::endl;
2174         }
2175         s.setf(oldflags,std::ios::floatfield);
2176         s.precision(oldprec);
2177         s.width(oldwidth);
2178 
2179         return s;
2180     }
2181 
2182 
2183     /// Outer product ... result(i,j,...,p,q,...) = left(i,k,...)*right(p,q,...)
2184 
2185     /// \ingroup tensor
2186     template <class T>
2187     Tensor<T> outer(const Tensor<T>& left, const Tensor<T>& right) {
2188         long nd = left.ndim() + right.ndim();
2189         TENSOR_ASSERT(nd <= TENSOR_MAXDIM,"too many dimensions in result",
2190                       nd,0);
2191         long d[TENSOR_MAXDIM];
2192         for (long i=0; i<left.ndim(); ++i) d[i] = left.dim(i);
2193         for (long i=0; i<right.ndim(); ++i) d[i+left.ndim()] = right.dim(i);
2194         Tensor<T> result(nd,d,false);
2195         T* ptr = result.ptr();
2196 
2197         TensorIterator<T> iter=right.unary_iterator(1,false,true);
2198         for (TensorIterator<T> p=left.unary_iterator(); p!=left.end(); ++p) {
2199             T val1 = *p;
2200             // Cannot reorder dimensions, but can fuse contiguous dimensions
2201             for (iter.reset(); iter._p0; ++iter) {
2202                 long dimj = iter.dimj;
2203                 T* _p0 = iter._p0;
2204                 long Tstride = iter._s0;
2205                 for (long _j=0; _j<dimj; ++_j, _p0+=Tstride) {
2206                     *ptr++ = val1 * (*_p0);
2207                 }
2208             }
2209         }
2210 
2211         return result;
2212     }
2213 
2214 
2215     /// Inner product ... result(i,j,...,p,q,...) = sum(z) left(i,j,...,z)*right(z,p,q,...)
2216 
2217     /// \ingroup tensor
2218     /// By default it contracts the last dimension of the left tensor and
2219     /// the first dimension of the right tensor.  These defaults can be
2220     /// changed by specifying \c k0 and \c k1 , the index to contract in
2221     /// the left and right side tensors, respectively.  The defaults
2222     /// correspond to (\c k0=-1 and \c k1=0 ).
2223     template <class T, class Q>
2224     Tensor<TENSOR_RESULT_TYPE(T,Q)> inner(const Tensor<T>& left, const Tensor<Q>& right,
2225                                           long k0=-1, long k1=0) {
2226         if (k0 < 0) k0 += left.ndim();
2227         if (k1 < 0) k1 += right.ndim();
2228         long nd = left.ndim() + right.ndim() - 2;
2229         TENSOR_ASSERT(nd!=0, "result is a scalar but cannot return one ... use dot",
2230                       nd, &left);
2231 
2232 
2233         TENSOR_ASSERT(left.dim(k0) == right.dim(k1),"common index must be same length",
2234                 	right.dim(k1), &left);
2235 
2236         TENSOR_ASSERT(nd > 0 && nd <= TENSOR_MAXDIM,
2237                       "invalid number of dimensions in the result", nd,0);
2238 
2239         long d[TENSOR_MAXDIM];
2240 
2241         long base=0;
2242         for (long i=0; i<k0; ++i) d[i] = left.dim(i);
2243         for (long i=k0+1; i<left.ndim(); ++i) d[i-1] = left.dim(i);
2244         base = left.ndim()-1;
2245         for (long i=0; i<k1; ++i) d[i+base] = right.dim(i);
2246         base--;
2247         for (long i=k1+1; i<right.ndim(); ++i) d[i+base] = right.dim(i);
2248 
2249         Tensor<TENSOR_RESULT_TYPE(T,Q)> result(nd,d);
2250 
2251         inner_result(left,right,k0,k1,result);
2252 
2253         return result;
2254     }
2255 
2256     /// Accumulate inner product into user provided, contiguous, correctly sized result tensor
2257 
2258     /// \ingroup tensor
2259     /// This routine may be used to optimize away the tensor constructor
2260     /// of the result tensor in inner loops when the result tensor may be
2261     /// reused or accumulated into.  If the user calls this routine
2262     /// directly very little checking is done since it is intended as an
2263     /// optimization for small tensors.  As far as the result goes, the
2264     /// caller is completely responsible for providing a contiguous tensor
2265     /// that has the correct dimensions and is appropriately initialized.
2266     /// The inner product is accumulated into result.
2267     template <class T, class Q>
2268     void inner_result(const Tensor<T>& left, const Tensor<Q>& right,
2269                       long k0, long k1, Tensor< TENSOR_RESULT_TYPE(T,Q) >& result) {
2270 
2271         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
2272         // Need to include explicit optimizations for common special cases
2273         // E.g., contiguous, matrix-matrix, and 3d-tensor*matrix
2274 
2275         resultT* ptr = result.ptr();
2276 
2277         if (k0 < 0) k0 += left.ndim();
2278         if (k1 < 0) k1 += right.ndim();
2279 
2280         if (left.iscontiguous() && right.iscontiguous()) {
2281             if (k0==0 && k1==0) {
2282                 // c[i,j] = a[k,i]*b[k,j] ... collapsing extra indices to i & j
2283                 long dimk = left.dim(k0);
2284                 long dimj = right.stride(0);
2285                 long dimi = left.stride(0);
2286                 mTxm(dimi,dimj,dimk,ptr,left.ptr(),right.ptr());
2287                 return;
2288             }
2289             else if (k0==(left.ndim()-1) && k1==(right.ndim()-1)) {
2290                 // c[i,j] = a[i,k]*b[j,k] ... collapsing extra indices to i & j
2291                 long dimk = left.dim(k0);
2292                 long dimi = left.size()/dimk;
2293                 long dimj = right.size()/dimk;
2294                 mxmT(dimi,dimj,dimk,ptr,left.ptr(),right.ptr());
2295                 return;
2296             }
2297             else if (k0==0 && k1==(right.ndim()-1)) {
2298                 // c[i,j] = a[k,i]*b[j,k] ... collapsing extra indices to i & j
2299                 long dimk = left.dim(k0);
2300                 long dimi = left.stride(0);
2301                 long dimj = right.size()/dimk;
2302                 mTxmT(dimi,dimj,dimk,ptr,left.ptr(),right.ptr());
2303                 return;
2304             }
2305             else if (k0==(left.ndim()-1) && k1==0) {
2306                 // c[i,j] = a[i,k]*b[k,j] ... collapsing extra indices to i & j
2307                 long dimk = left.dim(k0);
2308                 long dimi = left.size()/dimk;
2309                 long dimj = right.stride(0);
2310                 mxm(dimi,dimj,dimk,ptr,left.ptr(),right.ptr());
2311                 return;
2312             }
2313         }
2314 
2315         long dimj = left.dim(k0);
2316         TensorIterator<Q> iter1=right.unary_iterator(1,false,false,k1);
2317 
2318         for (TensorIterator<T> iter0=left.unary_iterator(1,false,false,k0);
2319                 iter0._p0; ++iter0) {
2320             T* MADNESS_RESTRICT xp0 = iter0._p0;
2321             long s0 = iter0._s0;
2322             for (iter1.reset(); iter1._p0; ++iter1) {
2323                 T* MADNESS_RESTRICT p0 = xp0;
2324                 Q* MADNESS_RESTRICT p1 = iter1._p0;
2325                 long s1 = iter1._s0;
2326                 resultT sum = 0;
2327                 for (long j=0; j<dimj; ++j,p0+=s0,p1+=s1) {
2328                     sum += (*p0) * (*p1);
2329                 }
2330                 *ptr++ += sum;
2331             }
2332         }
2333     }
2334 
2335     /// Transform all dimensions of the tensor t by the matrix c
2336 
2337     /// \ingroup tensor
2338     /// Often used to transform all dimensions from one basis to another
2339     /// \code
2340     /// result(i,j,k...) <-- sum(i',j', k',...) t(i',j',k',...) c(i',i) c(j',j) c(k',k) ...
2341     /// \endcode
2342     /// The input dimensions of \c t must all be the same and agree with
2343     /// the first dimension of \c c .  The dimensions of \c c may differ in
2344     /// size.  If the dimensions of \c c are the same, and the operation
2345     /// is being performed repeatedly, then you might consider calling \c
2346     /// fast_transform instead which enables additional optimizations and
2347     /// can eliminate all constructor overhead and improve cache locality.
2348     ///
2349     template <class T, class Q>
2350     Tensor<TENSOR_RESULT_TYPE(T,Q)> transform(const Tensor<T>& t, const Tensor<Q>& c) {
2351         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
2352         TENSOR_ASSERT(c.ndim() == 2,"second argument must be a matrix",c.ndim(),&c);
2353         if (c.dim(0)==c.dim(1) && t.iscontiguous() && c.iscontiguous()) {
2354             Tensor<resultT> result(t.ndim(),t.dims(),false);
2355             Tensor<resultT> work(t.ndim(),t.dims(),false);
2356             return fast_transform(t, c, result, work);
2357         }
2358         else {
2359             Tensor<resultT> result = t;
2360             for (long i=0; i<t.ndim(); ++i) {
2361                 result = inner(result,c,0,0);
2362             }
2363             return result;
2364         }
2365     }
2366 
2367     /// Transform all dimensions of the tensor t by distinct matrices c
2368 
2369     /// \ingroup tensor
2370     /// Similar to transform but each dimension is transformed with a
2371     /// distinct matrix.
2372     /// \code
2373     /// result(i,j,k...) <-- sum(i',j', k',...) t(i',j',k',...) c[0](i',i) c[1](j',j) c[2](k',k) ...
2374     /// \endcode
2375     /// The first dimension of the matrices c must match the corresponding
2376     /// dimension of t.
2377     template <class T, class Q>
2378     Tensor<TENSOR_RESULT_TYPE(T,Q)> general_transform(const Tensor<T>& t, const Tensor<Q> c[]) {
2379         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
2380         Tensor<resultT> result = t;
2381         for (long i=0; i<t.ndim(); ++i) {
2382             result = inner(result,c[i],0,0);
2383         }
2384         return result;
2385     }
2386 
2387     /// Restricted but heavily optimized form of transform()
2388 
2389     /// \ingroup tensor
2390     /// Both dimensions of \c c must be the same and match all dimensions
2391     /// of the input tensor \c t.  All tensors must be contiguous.
2392     ///
2393     /// Performs the same operation as \c transform but it requires
2394     /// that the caller pass in workspace and a preallocated result,
2395     /// hoping that that both can be reused.  If the result and
2396     /// workspace are reused between calls, then no tensor
2397     /// constructors need be called and cache locality should be
2398     /// improved.  By passing in the workspace, this routine is kept
2399     /// thread safe.
2400     ///
2401     /// The input, result and workspace tensors must be distinct.
2402     ///
2403     /// All input tensors must be contiguous and fastest execution
2404     /// will result if all dimensions are approriately aligned and
2405     /// multiples of the underlying vector length.  The workspace and
2406     /// the result must be of the same size as the input \c t .  The
2407     /// result tensor need not be initialized before calling
2408     /// fast_transform.
2409     ///
2410     /// \code
2411     ///     result(i,j,k,...) <-- sum(i',j', k',...) t(i',j',k',...)  c(i',i) c(j',j) c(k',k) ...
2412     /// \endcode
2413     ///
2414     /// The input dimensions of \c t must all be the same .
2415     template <class T, class Q>
2416     Tensor< TENSOR_RESULT_TYPE(T,Q) >& fast_transform(const Tensor<T>& t, const Tensor<Q>& c,  Tensor< TENSOR_RESULT_TYPE(T,Q) >& result,
2417             Tensor< TENSOR_RESULT_TYPE(T,Q) >& workspace) {
2418         typedef  TENSOR_RESULT_TYPE(T,Q) resultT;
2419         const Q *pc=c.ptr();
2420         resultT *t0=workspace.ptr(), *t1=result.ptr();
2421         if (t.ndim()&1) {
2422             t0 = result.ptr();
2423             t1 = workspace.ptr();
2424         }
2425 
2426         long dimj = c.dim(1);
2427         long dimi = 1;
2428         for (int n=1; n<t.ndim(); ++n) dimi *= dimj;
2429 
2430 #if HAVE_IBMBGQ
2431         long nij = dimi*dimj;
2432         if (IS_UNALIGNED(pc) || IS_UNALIGNED(t0) || IS_UNALIGNED(t1)) {
2433             for (long i=0; i<nij; ++i) t0[i] = 0.0;
2434             mTxm(dimi, dimj, dimj, t0, t.ptr(), pc);
2435             for (int n=1; n<t.ndim(); ++n) {
2436                 for (long i=0; i<nij; ++i) t1[i] = 0.0;
2437                 mTxm(dimi, dimj, dimj, t1, t0, pc);
2438                 std::swap(t0,t1);
2439             }
2440         }
2441         else {
2442             mTxmq_padding(dimi, dimj, dimj, dimj, t0, t.ptr(), pc);
2443             for (int n=1; n<t.ndim(); ++n) {
2444                 mTxmq_padding(dimi, dimj, dimj, dimj, t1, t0, pc);
2445                 std::swap(t0,t1);
2446             }
2447         }
2448 #else
2449         // Now assume no restriction on the use of mtxmq
2450         mTxmq(dimi, dimj, dimj, t0, t.ptr(), pc);
2451         for (int n=1; n<t.ndim(); ++n) {
2452             mTxmq(dimi, dimj, dimj, t1, t0, pc);
2453             std::swap(t0,t1);
2454         }
2455 #endif
2456 
2457         return result;
2458     }
2459 
2460     /// Return a new tensor holding the absolute value of each element of t
2461 
2462     /// \ingroup tensor
2463     template <class T>
2464     Tensor< typename Tensor<T>::scalar_type > abs(const Tensor<T>& t) {
2465         typedef typename Tensor<T>::scalar_type scalar_type;
2466         Tensor<scalar_type> result(t.ndim(),t.dims(),false);
2467         BINARY_OPTIMIZED_ITERATOR(scalar_type,result,const T,t,*_p0 = std::abs(*_p1));
2468         return result;
2469     }
2470 
2471     /// Return a new tensor holding the argument of each element of t (complex types only)
2472 
2473     /// \ingroup tensor
2474     template <class T>
2475     Tensor< typename Tensor<T>::scalar_type > arg(const Tensor<T>& t) {
2476         typedef typename Tensor<T>::scalar_type scalar_type;
2477         Tensor<scalar_type> result(t.ndim(),t.dims(),false);
2478         BINARY_OPTIMIZED_ITERATOR(scalar_type,result,T,t,*_p0 = std::arg(*_p1));
2479         return result;
2480     }
2481 
2482     /// Return a new tensor holding the real part of each element of t (complex types only)
2483 
2484     /// \ingroup tensor
2485     template <class T>
2486     Tensor< typename Tensor<T>::scalar_type > real(const Tensor<T>& t) {
2487         typedef typename Tensor<T>::scalar_type scalar_type;
2488         Tensor<scalar_type> result(t.ndim(),t.dims(),false);
2489         BINARY_OPTIMIZED_ITERATOR(scalar_type,result,const T,t,*_p0 = std::real(*_p1));
2490         return result;
2491     }
2492 
2493     /// Return a new tensor holding the imaginary part of each element of t (complex types only)
2494 
2495     /// \ingroup tensor
2496     template <class T>
2497     Tensor< typename Tensor<T>::scalar_type > imag(const Tensor<T>& t) {
2498         typedef typename Tensor<T>::scalar_type scalar_type;
2499         Tensor<scalar_type> result(t.ndim(),t.dims(),false);
2500         BINARY_OPTIMIZED_ITERATOR(scalar_type,result,const T,t,*_p0 = std::imag(*_p1));
2501         return result;
2502     }
2503 
2504     /// Returns a new deep copy of the complex conjugate of the input tensor (complex types only)
2505 
2506     /// \ingroup tensor
2507     template <class T>
2508     Tensor<T> conj(const Tensor<T>& t) {
2509         Tensor<T> result(t.ndim(),t.dims(),false);
2510         BINARY_OPTIMIZED_ITERATOR(T,result,const T,t,*_p0 = std::conj(*_p1));
2511         return result;
2512     }
2513 }
2514 
2515 #undef TENSOR_SHARED_PTR
2516 
2517 #endif // MADNESS_TENSOR_TENSOR_H__INCLUDED
2518