1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 
31   $Id: srconf.h 2816 2012-03-23 14:59:52Z 3ru6ruWu $
32 */
33 
34 #ifndef TENSORTRAIN_H_
35 #define TENSORTRAIN_H_
36 
37 #include <madness/tensor/tensor.h>
38 #include <madness/tensor/srconf.h>
39 #include <madness/tensor/clapack.h>
40 #include <madness/tensor/tensor_lapack.h>
41 #include <madness/fortran_ctypes.h>
42 
43 
44 /**
45   \file tensortrain.h
46   \brief Defines and implements the tensor train decomposition as described in
47          I.V. Oseledets, Siam J. Sci. Comput. 33, 2295 (2011).
48   \ingroup tensor
49   \addtogroup tensor
50 
51 */
52 
53 
54 namespace madness {
55 
56     /// decompose the input tensor A into U and V, skipping singular vectors of
57     /// small singular values. One deep copy/allocation for U is performed
58 
59     /// @param[inout]   A the input matrix, on exit the matrix VT (right sing. vectors)
60     /// @param[out]     U contiguous new tensor holding the left sing. vectors
61     /// @param[in]      thresh threshold for truncation of the singular vectors
62     /// @param[in]      s   scratch tensor for the singular values, dimension min(n,m)
63     /// @param[in]      scr scratch tensor
64     /// the dimension of the scratch tensor may be computed as
65     /// long lwork=std::max(3*std::min(m,n)+std::max(m,n),5*std::min(m,n)) + n*m;
66     template<typename T>
rank_revealing_decompose(Tensor<T> & A,Tensor<T> & U,const double thresh,Tensor<typename Tensor<T>::scalar_type> & s,Tensor<T> & scr)67     long rank_revealing_decompose(Tensor<T>& A, Tensor<T>& U,
68             const double thresh, Tensor< typename Tensor<T>::scalar_type > & s,
69             Tensor<T>& scr) {
70 
71         MADNESS_ASSERT(A.ndim()==2);    // must be a matrix
72         const long n=A.dim(0);
73         const long m=A.dim(1);
74         const long rmax=std::min(n,m);
75         long lwork=std::max(3*std::min(m,n)+std::max(m,n),5*std::min(m,n));
76 
77         // set up contiguous scratch arrays
78         MADNESS_ASSERT(scr.size()>lwork+n*m);
79         scr=scr.flat();
80         Tensor<T> work=scr(Slice(0,lwork-1));
81         Tensor<T> utmp=scr(Slice(lwork,lwork+n*m-1));
82         Tensor<T> dummy;    // real dummy
83 
84         svd_result(A,utmp,s,dummy,work);
85 
86         // this is rank_right
87         const long R1=SRConf<T>::max_sigma(thresh,rmax,s)+1;
88 
89         // skip if rank=0
90         if (R1>0) {
91 
92             U=madness::copy((utmp(Slice(0,n*rmax-1)))
93                     .reshape(n,rmax)(_,Slice(0,R1-1)));
94 
95             A=A(Slice(0,R1-1),_);
96 
97             // continue with the next dimension
98             for (int j=0; j<m; ++j) {
99                 for (int i=0; i<R1; ++i) {
100                     A(i,j)*=s(i);
101                 }
102             }
103         } else {
104             U=Tensor<T>(n,0l);
105         }
106         return R1;
107     }
108 
109 
110 
111 	/**
112 	 * A tensor train is a multi-modal representation of a tensor t
113 	 * \code
114 	 *  t(i,j,k,l) = \sum G^1_{a1,i,a2} G^2_{a2,j,a3} G^3_{a3,k,a4} G^4_{a4,l,a5}
115 	 * \endcode
116 	 * The "core" tensors G are connected via a linear index network, where the
117 	 * first index a1 and the last index a5 are boundary indices and are set to 1.
118 	 *
119 	 * The tensor train representation is suited for any number of dimensions and
120 	 * in general at least as fast as the 2-way decomposition SVD. If the tensor
121 	 * has full rank it will need about twice the storage space of the full tensor
122 	 */
123 	template<typename T>
124 	class TensorTrain {
125 
126 	    // make all types of TensorTrain friends of each other (for type conversion)
127 	    template<typename Q>
128 	    friend class TensorTrain;
129 
130         /// C++ typename of this tensor.
131         typedef T type;
132 
133         /// C++ typename of the real type associated with a complex type.
134         typedef typename TensorTypeData<T>::scalar_type scalar_type;
135 
136         /// C++ typename of the floating point type associated with scalar real type
137         typedef typename TensorTypeData<T>::float_scalar_type float_scalar_type;
138 
139 		/// holding the core tensors of a tensor train
140 		/// the tensors have the shape (k,r0) (r0,k,r1) (r1,k,r2) .. (rn-1,k)
141 		std::vector<Tensor<T> > core;
142 		/// true if rank is zero
143 		bool zero_rank;
144 
145 	public:
146 
147 		/// empty constructor
TensorTrain()148 		TensorTrain() : core(), zero_rank(true) {
149 		}
150 
151 		/// ctor for a TensorTrain, with the tolerance eps
152 
153 		/// The tensor train will represent the input tensor with
154 		/// accuracy || t - this ||_2 < eps
155 		///
156 		/// Note that we rely on specific layout of the memory in the tensors, e.g.
157 		/// we pass SliceTensors on to lapack. This will only work if the slices are
158 		/// contiguous.
159 		///
160 		/// @param[in]	t	full representation of a tensor
161 		/// @param[in]	eps	the accuracy threshold
TensorTrain(const Tensor<T> & t,double eps)162 		TensorTrain(const Tensor<T>& t, double eps)
163 			: core(), zero_rank(false) {
164 
165 		    MADNESS_ASSERT(t.size() != 0);
166             MADNESS_ASSERT(t.ndim() != 0);
167 
168             std::vector<long> dims(t.ndim());
169             for (int d=0; d<t.ndim(); ++d) dims[d]=t.dim(d);
170             decompose(t.flat(),eps,dims);
171 
172 		}
173 
174 		/// ctor for a TensorTrain, with the tolerance eps
175 
176 		/// The tensor train will represent the input tensor with
177 		/// accuracy || t - this ||_2 < eps
178 		///
179 		/// Note that we rely on specific layout of the memory in the tensors, e.g.
180 		/// we pass SliceTensors on to lapack. This will only work if the slices are
181 		/// contiguous.
182 		///
183 		/// @param[in]	t		full representation of a tensor
184 		/// @param[in]	eps		the accuracy threshold
185 		/// @param[in]	dims	the tt structure
TensorTrain(const Tensor<T> & t,double eps,const std::vector<long> dims)186 		TensorTrain(const Tensor<T>& t, double eps, const std::vector<long> dims)
187 			: core(), zero_rank(false) {
188 
189 		    MADNESS_ASSERT(t.size() != 0);
190             MADNESS_ASSERT(t.ndim() != 0);
191             decompose(t,eps,dims);
192 		}
193 
194 		/// ctor for a TensorTrain, set up only the dimensions, no data
TensorTrain(const std::vector<long> & dims)195 		TensorTrain(const std::vector<long>& dims) {
196             zero_rank = true;
197 
198             core.resize(dims.size());
199             // first and last core tensor
200             core[0] = Tensor<T>(dims[0],long(0));
201             core[dims.size()-1] = Tensor<T>(long(0),dims[dims.size()-1]);
202 
203             // iterate through the rest -- fast forward
204             for (int d=1; d<dims.size()-1; ++d) {
205                 core[d] = Tensor<T>(long(0),dims[d],long(0));
206 		    }
207 
208 		}
209 
210 
211         /// ctor for a TensorTrain, with core tensors explicitly given
212 
213         /// the core tensors must have the shape (k1,r1) (r1,k2,r2) .. (r2,k3)
214 		/// @param[in]  core    vector of core tensors, properly shaped
TensorTrain(const std::vector<Tensor<T>> & c)215         TensorTrain(const std::vector<Tensor<T> >& c) : core(c) {
216             zero_rank=false;
217 
218             // check for zero ranks
219             for (int d=1; d<core.size(); ++d) if (core[d].dim(0)==0) zero_me();
220         }
221 
222 
223 		/// copy constructor, shallow
TensorTrain(const TensorTrain & other)224 		TensorTrain(const TensorTrain& other) : core(other.core),
225 		        zero_rank(other.zero_rank) {
226 		}
227 
228 		/// assigment operator
229 		TensorTrain& operator=(const TensorTrain& other) {
230 		    if (this!=&other) {
231                 zero_rank=other.zero_rank;
232                 core=other.core;
233 		    }
234             return *this;
235 		}
236 
237         /// Type conversion makes a deep copy
238         template <class Q> operator TensorTrain<Q>() const { // type conv => deep copy
239 
240             TensorTrain<Q> result(this->dims());
241             result.zero_rank=zero_rank;
242             for (const Tensor<T>& c : core) {
243                 result.core.push_back(Tensor<Q>(c));
244             }
245             return result;
246         }
247 
248         /// serialize this
249         template <typename Archive>
serialize(Archive & ar)250         void serialize(Archive& ar) {
251             long dim=ndim();
252             ar & zero_rank & dim;
253 
254             // no empty tensor
255             if (dim>0) {
256 
257                 ar & core;
258 
259                 // need this because tensor serialization does not preserve the
260                 // dimensions if the tensor is empty, but we need to!
261 
262                 // existing tensor filled with zeros
263                 if (zero_rank) {
264                     std::vector<long> d=this->dims();
265                     ar & d;
266                     zero_me(d);
267                 }
268             }
269         }
270 
271 
272 		/// deep copy of the whole tensor
273 
274 		/// if argument has zero rank return a zero-rank tensor of the same dimensions
copy(const TensorTrain & other)275 		friend TensorTrain copy(const TensorTrain& other) {
276 
277 		    // fast return
278 		    if (other.zero_rank) return TensorTrain(other.dims());
279 
280             TensorTrain result;
281             for (const Tensor<T>& t: other.core) {
282                 if (t.size()==0) return TensorTrain(other.dims());  // also zero
283                 result.core.push_back(madness::copy(t));
284             }
285             result.zero_rank=other.zero_rank;
286             return result;
287 		}
288 
289 		/// deep copy of a slice of the tensor
290 
291 		/// this operation does not change the ranks, i.e. the resulting
292 		/// tensor is most likely not in an optimal compression state
293 		/// @param[in]  other   tensor to be sliced
294 		/// @param[in]  s       vector of slices
copy(const TensorTrain & other,const std::vector<Slice> & s)295 		friend TensorTrain copy(const TensorTrain& other, const std::vector<Slice>& s) {
296 
297 		    MADNESS_ASSERT(other.ndim()==s.size());
298 		    if (other.zero_rank) {
299 		        std::vector<long> dims(s.size());
300 		        for (int i=0; i<dims.size(); ++i) dims[i]=s[i].end-s[i].start+1;
301 		        return TensorTrain(dims);
302 		    }
303 
304 		    TensorTrain result;
305 		    const long nd=other.ndim();
306 		    result.zero_rank=other.zero_rank;
307 		    result.core.resize(nd);
308 
309 		    // special treatment for first and last core tensor
310 		    // slice dim only, keep ranks
311 		    result.core[0]=copy(other.core[0](s[0],_));
312 		    for (long i=1; i<nd-1; ++i) {
313 		        result.core[i]=copy(other.core[i](_,s[i],_));
314 		    }
315 
316 		    if (other.core.size()>1) result.core[nd-1]=copy(other.core[nd-1](_,s[nd-1]));
317 		    return result;
318 		}
319 
320 		/// decompose the input tensor into a TT representation
321 
322 		/// @param[in]	t		tensor in full rank
323 		/// @param[in]	eps		the precision threshold
324 		/// @param[in]	dims	the tt structure
decompose(const Tensor<T> & t,double eps,const std::vector<long> & dims)325 		void decompose(const Tensor<T>& t, double eps,
326 				const std::vector<long>& dims) {
327 
328 			core.resize(dims.size());
329 			eps=eps/sqrt(dims.size()-1);	// error is relative
330 
331 			// the maximum rank is the smaller one of the ranks unfolded from the
332 			// left and the right.
333 			int rmax1=1;
334 			int rmax2=1;
335 			long n1=1, n2=1;
336 			long lwork=0;
337 			for (int i=0, j=dims.size()-1; i<=j; ++i, --j) {
338 				rmax1*=dims[i];
339 				rmax2*=dims[j];
340 
341 				// work array for dgesvd
342 				n1*=dims[i];
343 				long m1=t.size()/dims[i];
344 				n2*=dims[j];
345 				long m2=t.size()/dims[j];
346 				long max1=std::max(n1,m1);
347 				long max2=std::max(n2,m2);
348 				long min1=std::min(n1,m1);
349 				long min2=std::min(n2,m2);
350 
351 				lwork=std::max(lwork,std::max(3*min1+max1,5*min1));
352 				lwork=std::max(lwork,std::max(3*min2+max2,5*min2));
353 			}
354 			const int rmax=std::min(rmax1,rmax2);
355 
356 			// these are max dimensions, so we can avoid frequent reallocation
357 			Tensor<T> u(rmax1*rmax2);
358 			Tensor<T> dummy;
359 			Tensor< typename Tensor<T>::scalar_type > s(rmax);
360 
361 			// the dimension of the remainder tensor; will be cut down in each iteration
362 			long vtdim=t.size();
363 
364 			// c will be destroyed, and assignment is only shallow, so need to deep copy
365 			Tensor<T> c=madness::copy(t);
366 
367 			// work array for dgesvd
368 			Tensor<T> work(lwork);
369 
370 
371 			// this keeps track of the ranks
372 			std::vector<long> r(dims.size()+1,0l);
373 			r[0] = r[dims.size()] = 1;
374 
375 			for (std::size_t d=1; d<dims.size(); ++d) {
376 
377 				// the core tensors will have dimensions c(rank_left,k,rank_right)
378 				// or for lapack's purposes c(d1,rank_right)
379 				const long k=dims[d-1];
380 				const long d1=r[d-1]*k;
381 				c=c.reshape(d1,c.size()/d1);
382 				const long rmax=std::min(c.dim(0),c.dim(1));
383 				vtdim=vtdim/k;
384 
385 				// testing
386 #if 0
387 				// c will be destroyed upon return
388 				Tensor<T> aa=copy(c);
389 #endif
390 				// The svd routine assumes lda=a etc. Pass in a flat tensor and reshape
391 				// and slice it after processing.
392 				u=u.flat();
393 				svd_result(c,u,s,dummy,work);
394 
395 				// this is rank_right
396 				r[d]=SRConf<T>::max_sigma(eps,rmax,s)+1;
397 				const long rank=r[d];
398 
399 				// this is for testing
400 #if 0
401 				if (0) {
402 					Tensor<T> uu=(u(Slice(0,c.dim(0)*rmax-1))).reshape(c.dim(0),rmax);
403 					Tensor<T> vtt=copy(c(Slice(0,rmax-1),_));
404 					Tensor<T> b(d1,vtdim);
405 					for (long i=0; i<c.dim(0); ++i)
406 						for (long j=0; j<c.dim(1); ++j)
407 							for (long k=0; k<rmax; ++k)
408 								b(i,j) += uu(i,k) * T(s(k)) * vtt(k,j);
409 					b -= aa;
410 					print("b.conforms c",b.conforms(c));
411 					print("early error:",b.absmax());
412 				}
413 #endif
414 
415 				//        U = Tensor<T>(m,rmax);
416 				//        VT = Tensor<T>(rmax,n);
417 
418 				// handle rank=0 explicitly
419 				if (r[d]) {
420 
421 					// done with this dimension -- slice and deep-copy
422 					core[d-1]=madness::copy((u(Slice(0,c.dim(0)*rmax-1)))
423 							.reshape(c.dim(0),rmax)(_,Slice(0,rank-1)));
424 					core[d-1]=core[d-1].reshape(r[d-1],k,r[d]);
425 
426 					// continue with the next dimension
427 					c=c(Slice(0,rank-1),_);
428 
429 					for (int i=0; i<rank; ++i) {
430 						for (int j=0; j<c.dim(1); ++j) {
431 							c(i,j)*=s(i);
432 						}
433 					}
434 
435 					if (d == dims.size()-1) core[d]=c;
436 				}
437 				else {
438 					zero_rank = true;
439 					core[d-1] = Tensor<T>(r[d-1],k,long(0));
440 					// iterate through the rest -- fast forward
441 					for(++d; d<dims.size(); ++d) {
442 						const long k=dims[d-1];
443 						core[d-1] = Tensor<T>(long(0),k,long(0));
444 					}
445 					core[dims.size()-1] = Tensor<T>(long(0),dims[dims.size()-1]);
446 				}
447 			}
448 			core[0]=core[0].fusedim(0);
449 
450 
451 		}
452 
453 
454 		/// turn this into an empty tensor with all cores properly shaped
zero_me()455 		void zero_me() {
456 		    *this=TensorTrain<T>(this->dims());
457 		}
458 
459         /// turn this into an empty tensor with all cores properly shaped
zero_me(const std::vector<long> & dim)460         void zero_me(const std::vector<long>& dim) {
461             *this=TensorTrain<T>(dim);
462         }
463 
464         /// assign a number to this tensor
465         TensorTrain<T>& operator=(const T& number) {
466 
467             // tensor will have ranks = 1 all over
468             core[0]=Tensor<T>(dim(0),1);
469             for (int i=1; i<ndim()-1; ++i) core[i]=Tensor<T>(1,dim(i),1);
470             if (ndim()>1) core[ndim()-1]=Tensor<T>(1,dim(ndim()-1));
471 
472             core[0]=number;
473             for (int i=1; i<ndim(); ++i) core[i]=1.0;
474 
475             zero_rank=false;
476             return *this;
477         }
478 
479 		/// return this multiplied by a scalar
480 
481 		/// @return new tensor
482 		TensorTrain<T> operator*(const T& factor) const {
483 		    TensorTrain result=copy(*this);
484 		    result.scale(factor);
485 		    return result;
486 		}
487 
488 		/// inplace addition of two Tensortrains; will increase ranks of this
489 
490 		/// inefficient if many additions are performed, since it requires
491 		/// many calls of new.
492 		/// @param[in]	rhs	a TensorTrain to be added
493 		TensorTrain<T>& operator+=(const TensorTrain<T>& rhs) {
494 		    gaxpy(1.0,rhs,1.0);
495 		    return *this;
496 		}
497 
498         /// inplace subtraction of two Tensortrains; will increase ranks of this
499 
500         /// inefficient if many subtractions are performed, since it requires
501         /// many calls of new.
502         /// @param[in]  rhs a TensorTrain to be added
503         TensorTrain<T>& operator-=(const TensorTrain<T>& rhs) {
504             gaxpy(1.0,rhs,-1.0);
505             return *this;
506         }
507 
508 		/// Inplace generalized saxpy ... this = this*alpha + other*beta
gaxpy(T alpha,const TensorTrain<T> & rhs,T beta)509         TensorTrain<T>& gaxpy(T alpha, const TensorTrain<T>& rhs, T beta) {
510 
511 			// make sure dimensions conform
512 			MADNESS_ASSERT(this->ndim()==rhs.ndim());
513 
514 			if (this->zero_rank or (alpha==0.0)) {
515 				*this=rhs*beta;
516 			} else if (rhs.zero_rank or (beta==0.0)) {
517 				scale(alpha);
518 			} else if (ndim()==1) {     // simple algorithm for ndim=1
519 			    core[0].gaxpy(alpha,rhs.core[0],beta);
520 			} else {
521 
522 				// special treatment for first border cores (k,r1)
523 
524 			    // alpha and beta are only included in the first core(!)
525 				{
526 					long k=core[0].dim(0);
527 					long r1_this=core[0].dim(1);
528 					long r1_rhs=rhs.core[0].dim(1);
529 					Tensor<T> core_new(k,r1_this+r1_rhs);
530 					core_new(_,Slice(0,r1_this-1))=alpha*core[0];
531 					core_new(_,Slice(r1_this,r1_this+r1_rhs-1))=beta*rhs.core[0];
532 					core[0]=core_new;
533 				}
534 
535 				// interior cores (r0,k,r1)
536 				for (std::size_t i=1; i<core.size()-1; ++i) {
537 					MADNESS_ASSERT(core[i].ndim()==3 or i==core.size()-1);
538 					long r0_this=core[i].dim(0);
539 					long r0_rhs=rhs.core[i].dim(0);
540 					long k=core[i].dim(1);
541 					long r1_this=core[i].dim(2);
542 					long r1_rhs=rhs.core[i].dim(2);
543 					Tensor<T> core_new(r0_this+r0_rhs,k,r1_this+r1_rhs);
544 					core_new(Slice(0,r0_this-1),_,Slice(0,r1_this-1))=core[i];
545 					core_new(Slice(r0_this,r0_this+r0_rhs-1),_,Slice(r1_this,r1_this+r1_rhs-1))=rhs.core[i];
546 					core[i]=core_new;
547 				}
548 
549 				// special treatment for last border core (r0,k)
550 				{
551 					std::size_t d=core.size()-1;
552 					long r0_this=core[d].dim(0);
553 					long r0_rhs=rhs.core[d].dim(0);
554 					long k=core[d].dim(1);
555 					Tensor<T> core_new(r0_this+r0_rhs,k);
556 					core_new(Slice(0,r0_this-1),_)=core[d];
557 					core_new(Slice(r0_this,r0_this+r0_rhs-1),_)=rhs.core[d];
558 					core[d]=core_new;
559 				}
560 			}
561 			if (not verify()) MADNESS_EXCEPTION("ranks in TensorTrain inconsistent",1);
562 			return *this;
563 		}
564 
565         /// Inplace generalized saxpy with slices and without alpha
566 
567         /// return this = this(s1) + other(s2) * beta
gaxpy(const std::vector<Slice> & s1,const TensorTrain<T> & rhs,T beta,const std::vector<Slice> & s2)568         TensorTrain<T>& gaxpy(const std::vector<Slice>& s1,
569                 const TensorTrain<T>& rhs, T beta, const std::vector<Slice>& s2) {
570 
571             // make sure dimensions conform
572             MADNESS_ASSERT(this->ndim()==rhs.ndim());
573 
574 //            if (this->zero_rank) {
575 //                *this=rhs*beta;
576 
577 //            } else {
578             if (true) {
579                 // special treatment for first border cores (k,r1)
580 
581                 // alpha and beta are only included in the first core(!)
582                 {
583                     long k=core[0].dim(0);  // this is max dimension: k>=slice1; k>=slice2
584                     long r1_this=core[0].dim(1);
585                     long r1_rhs=rhs.core[0].dim(1);
586 
587                     Tensor<T> core_new(k,r1_this+r1_rhs);
588                     if (r1_this>0) core_new(_,Slice(0,r1_this-1))=core[0];
589                     if (r1_rhs>0)  core_new(s1[0],Slice(r1_this,r1_this+r1_rhs-1))=beta*rhs.core[0](s2[0],_);
590                     core[0]=core_new;
591                 }
592 
593                 // interior cores (r0,k,r1)
594                 for (std::size_t i=1; i<core.size()-1; ++i) {
595                     MADNESS_ASSERT(core[i].ndim()==3 or i==core.size()-1);
596                     long r0_this=core[i].dim(0);
597                     long r0_rhs=rhs.core[i].dim(0);
598                     long k=core[i].dim(1);
599                     long r1_this=core[i].dim(2);
600                     long r1_rhs=rhs.core[i].dim(2);
601                     Tensor<T> core_new(r0_this+r0_rhs,k,r1_this+r1_rhs);
602                     if (r1_this>0) core_new(Slice(0,r0_this-1),_,Slice(0,r1_this-1))=core[i];
603                     if (r1_rhs>0)  core_new(Slice(r0_this,r0_this+r0_rhs-1),s1[i],Slice(r1_this,r1_this+r1_rhs-1))=rhs.core[i](_,s2[i],_);
604                     core[i]=core_new;
605                 }
606 
607                 // special treatment for last border core (r0,k)
608                 {
609                     std::size_t d=core.size()-1;
610                     long r0_this=core[d].dim(0);
611                     long r0_rhs=rhs.core[d].dim(0);
612                     long k=core[d].dim(1);
613                     Tensor<T> core_new(r0_this+r0_rhs,k);
614                     if (r0_this>0) core_new(Slice(0,r0_this-1),_)=core[d];
615                     if (r0_rhs>0)  core_new(Slice(r0_this,r0_this+r0_rhs-1),s1[d])=rhs.core[d](_,s2[d]);
616                     core[d]=core_new;
617                 }
618             }
619             if (not rhs.zero_rank) zero_rank=false;
620             if (not verify()) MADNESS_EXCEPTION("ranks in TensorTrain inconsistent",1);
621             return *this;
622         }
623 
624         /// compute the Hadamard product of two TensorTrains
625 
626         /// see Sec. 4.2 of the TT paper
627         /// @return *this with the ranks of the input tensors multiplied!
emul(const TensorTrain<T> & other)628         TensorTrain<T>& emul(const TensorTrain<T>& other) {
629 
630             // consistency checks
631             MADNESS_ASSERT(ndim()==other.ndim());
632             for (int i=0; i<ndim(); ++i) MADNESS_ASSERT(dim(i)==other.dim(i));
633 
634             // set up the result tensor
635             std::vector<long> cranks(ndim()-1);       // ranks of result C
636             for (int i=0; i<ndim()-1; ++i) cranks[i]=ranks(i)*other.ranks(i);
637 
638             // check for large sizes
639             for (int i=0; i<ndim()-2; ++i) {
640                 long size=cranks[i]*cranks[i+1]*dim(i);
641                 if (size>10000000)
642                     MADNESS_EXCEPTION("emul in TensorTrain too large -- use full rank tenspr",1);
643             }
644             TensorTrain result(dims());
645 
646             // fast return for zero ranks
647             if (zero_rank or other.zero_rank) {
648                 ;
649             } else if (ndim()==1) {
650                 // fast return for one core only (all ranks equal 1)
651                 result.core[0]=copy(core[0]);
652                 result.core[0].emul(other.core[0]);
653 
654             } else {
655                 result.zero_rank=false;
656                 // compute outer product for each core for each k
657 
658                 // special treatment for first core
659                 result.core[0]=Tensor<T>(dim(0),cranks[0]);
660                 for (int k=0; k<dim(0); ++k) {
661                     Tensor<T> a1=core[0](Slice(k,k),_);
662                     Tensor<T> b1=other.core[0](Slice(k,k),_);
663                     result.core[0](Slice(k,k),_)=outer(a1,b1).reshape(1,cranks[0]);
664                     // before reshape rhs has dimensions (1, r1, 1, r1')
665                     // after reshape rhs has dimensions (1, r1*r1')
666                 }
667 
668                 for (int i=1; i<ndim()-1; ++i) {
669                     result.core[i]=Tensor<T>(cranks[i-1],dim(i),cranks[i]);
670                     for (int k=0; k<dim(i); ++k) {
671                         Tensor<T> a1=core[i](_,Slice(k,k),_).fusedim(1);    // (r1,r2)
672                         Tensor<T> b1=other.core[i](_,Slice(k,k),_).fusedim(1);  // (r1',r2')
673                         Tensor<T> tmp=copy(outer(a1,b1).swapdim(1,2));  // make contiguous
674                         result.core[i](_,Slice(k,k),_)=tmp.reshape(cranks[i-1],1,cranks[i]);
675                         // before swap/fuse/splitdim rhs has dimensions (r1, r2, r1', r2')
676                         // after fusedim/splitdim rhs has dimensions (r1*r1', 1, r2*r2')
677                     }
678                 }
679 
680                 // special treatment for last core
681                 const long n=ndim()-1;
682                 result.core[n]=Tensor<T>(cranks[n-1],dim(n));
683                 for (int k=0; k<dim(n); ++k) {
684                     Tensor<T> a1=core[n](_,Slice(k,k));
685                     Tensor<T> b1=other.core[n](_,Slice(k,k));
686                     result.core[n](_,Slice(k,k))=outer(a1,b1).reshape(cranks[n-1],1);
687                     // before reshape rhs has dimensions (r1,1, r1',1)
688                     // after reshape rhs has dimensions (r1*r1', 1)
689 
690                 }
691             }
692             *this=result;
693             return *this;
694         }
695 
696 
697 
698 		/// merge two dimensions into one
699 
700 		/// merge dimension i and i+1 into new dimension i
701 		/// @param[in]	i	the first dimension
fusedim(const long i)702 		void fusedim(const long i) {
703 			// core_new = left * right
704 			// (r1, k1*k2, r3) = sum_r2 (r1, k1, r2) * (r2, k2, r3)
705 
706 			// determine index
707 			const int index=core[i].ndim()-2;	// (r-1, k, k, .. , k, r1)
708 
709 			if (not zero_rank) {
710 			    core[i]=inner(core[i],core[i+1]);
711 	            core[i]=core[i].fusedim(index);
712 			} else {
713 			    if (i==0) { // (k1*k2, r2=0)
714 	                core[i]=Tensor<T>(core[i].dim(0)*core[i+1].dim(1),0l);
715 			    } else {                /// (r1=0, k1*k2, r2=0)
716                     core[i]=Tensor<T>(0l,core[i].dim(0)*core[i+1].dim(1),0l);
717 			    }
718 			}
719 
720 			// shift all subsequent cores and remove the last one
721 			for (std::size_t d=i+1; d<core.size()-1; ++d) core[d]=core[d+1];
722 			core.pop_back();
723 
724 		}
725 
726         /// Returns new view/tensor splitting dimension \c i as \c dimi0*dimi1
727 		/// to produce conforming d+1 dimension tensor
728 
729 		/// @param[in]  idim    the dimension to be split
730 		/// @param[in]  k1      new first dimension of idim
731 		/// @param[in]  k2      new second dimension of idim
732 		/// @param[in]  eps     threshold for SVD (choose negative to keep all terms)
733         /// @return new deep copy of this with split dimensions
splitdim(long idim,long k1,long k2,const double eps)734         TensorTrain<T> splitdim(long idim, long k1, long k2, const double eps) const {
735             // core_new = left * right
736             // (r1, k1*k2, r3) = sum_r2 (r1, k1, r2) * (r2, k2, r3)
737 
738             // check for consistency
739             MADNESS_ASSERT(k1*k2==dim(idim));
740 
741             if (zero_rank) {
742                 std::vector<long> newdims(this->ndim()+1);
743                 for (long i=0; i<idim; ++i) newdims[i]=this->dim(i);
744                 newdims[idim]=k1;
745                 newdims[idim+1]=k2;
746                 for (long i=idim+1; i<ndim(); ++i) newdims[i+1]=dim(i);
747                 return TensorTrain(newdims);
748             }
749 
750             TensorTrain<T> result;
751 
752             long r1= (idim==0) ? 1 : ranks(idim-1);       // left-side rank
753             long r2= (idim==ndim()-1) ? 1 : ranks(idim);  // right-side rank
754             long k12=dim(idim);
755 
756             Tensor<T> A=core[idim].reshape(r1*k1,r2*k2);
757             Tensor<T> U,VT;
758             Tensor< typename Tensor<T>::scalar_type > s(k12);
759             svd(A,U,s,VT);
760 
761             // this is the new interior rank
762             long r=SRConf<T>::max_sigma(eps,std::min(A.dim(0),A.dim(1)),s)+1;
763 
764             // handle rank=0 explicitly
765             if (r==0) {
766                 std::vector<long> newdims(this->ndim()+1);
767                 for (long i=0; i<idim; ++i) newdims[i]=this->dim(i);
768                 newdims[idim]=k1;
769                 newdims[idim+1]=k2;
770                 for (long i=idim+1; i<ndim(); ++i) newdims[i+1]=dim(i);
771                 return TensorTrain(newdims);
772             } else {
773 
774                 // convolve the singular values into the right singular vectors
775                 for (int ii=0; ii<r; ++ii) {
776                     for (int j=0; j<VT.dim(1); ++j) {
777                         VT(ii,j)*=s(ii);
778                     }
779                 }
780 
781                 for (long ii=0; ii<idim; ++ii) result.core.push_back(copy(core[ii]));
782                 result.core.push_back(copy(U(_,Slice(0,r-1))).reshape(r1,k1,r));
783                 result.core.push_back(copy(VT(Slice(0,r-1),_)).reshape(r,k2,r2));
784                 for (long ii=idim+1; ii<ndim(); ++ii) result.core.push_back(core[ii]);
785 
786                 // post-processing
787                 if (result.core.front().ndim()==3) result.core.front()=result.core.front().fusedim(0);
788                 if (result.core.back().ndim()==3) result.core.back()=result.core.back().fusedim(1);
789                 result.zero_rank=false;
790             }
791             return result;
792 
793         }
794 
795 		/// reconstruct this to a full representation
796 
797 		/// @param[in]	flat	return this in flat representation
798 		/// @return	this in full rank representation
799 		Tensor<T> reconstruct(const bool flat=false) const {
800 
801 			if (zero_rank) {
802 				if (flat) {
803 					long size=1;
804 					for (int i=1; i<this->ndim(); ++i) size*=core[i].dim(1);
805 					return Tensor<T>(size);
806 				} else {
807 					std::vector<long> d(this->ndim());
808 					d[0]=core[0].dim(0);	// first core tensor has shape (k,r1)
809 					for (int i=1; i<this->ndim(); ++i) d[i]=core[i].dim(1);
810 					return Tensor<T>(d);
811 				}
812 			}
813 
814 			Tensor<T> result=core.front();
815 			typename std::vector<Tensor<T> >::const_iterator it;
816 
817 			for (it=++core.begin(); it!=core.end(); ++it) {
818 				result=inner(result,*it);
819 				if (flat) result=result.fusedim(0);
820 			}
821 			return result;
822 		}
823 
824 		/// construct a two-mode representation (aka unnormalized SVD)
825 
826 		/// @param[out] U The left singular vectors, ({i},rank)
827 		/// @param[out] VT The right singular vectors, (rank,{i})
828 		/// @param[out]	s Vector holding 1's, (rank)
two_mode_representation(Tensor<T> & U,Tensor<T> & VT,Tensor<typename Tensor<T>::scalar_type> & s)829 		void two_mode_representation(Tensor<T>& U, Tensor<T>& VT,
830 		        Tensor< typename Tensor<T>::scalar_type >& s) {
831 
832 		    // number of dimensions needs to be even
833 		    MADNESS_ASSERT(ndim()%2==0);
834 
835 		    if (not zero_rank) {
836 		        typename std::vector<Tensor<T> >::const_iterator it1, it2;
837 		        U=core.front();
838 		        VT=core.back();
839 		        for (it1=++core.begin(), it2=--(--core.end()); it1<it2; ++it1, --it2) {
840 		            U=inner(U,*it1);
841 		            VT=inner(*it2,VT);
842 		        }
843 		        s=Tensor< typename Tensor<T>::scalar_type >(VT.dim(0));
844 		        s=1.0;
845 		    }
846 		    else {
847 		        if (ndim()==2) {
848                     U = Tensor<T>(dim(0),long(0));
849                     VT = Tensor<T>(long(0),dim(1));
850 		        } else if (ndim()==4) {
851                     U = Tensor<T>(dim(0),dim(1),long(0));
852                     VT = Tensor<T>(long(0),dim(2),dim(3));
853 		        } else if (ndim()==6) {
854                     U = Tensor<T>(dim(0),dim(1),dim(2),long(0));
855                     VT = Tensor<T>(long(0),dim(3),dim(4),dim(5));
856 		        } else {
857 		            MADNESS_EXCEPTION("ndim>6 in tensortrain::two_mode_representation",1);
858 		        }
859 		        s = Tensor< typename Tensor<T>::scalar_type >(0l);
860 		    }
861 		}
862 
863         /// recompress and truncate this TT representation
864 
865         /// this in recompressed TT form with optimal rank
866         /// @param[in]  eps the truncation threshold
867 		template<typename R=T>
868         typename std::enable_if<!std::is_arithmetic<R>::value, void>::type
truncate(double eps)869         truncate(double eps) {
870             MADNESS_EXCEPTION("no complex truncate in TensorTrain",1);
871         }
872 
873 
874 		/// recompress and truncate this TT representation
875 
876 		/// this in recompressed TT form with optimal rank
877 		/// @param[in]	eps	the truncation threshold
878 		template<typename R=T>
879 		typename std::enable_if<std::is_arithmetic<R>::value, void>::type
truncate(double eps)880 		truncate(double eps) {
881 
882 		    // fast return
883 		    if (zero_rank) return;
884 
885 		    for (long i=0; i<core.size()-1; ++i) if (ranks(i)==0) zero_rank=true;
886 
887 		    if (zero_rank) {
888 		        zero_me();
889 		        return;
890 		    }
891 
892 		    std::vector<long> tt_dims=this->dims();
893 
894 		    eps=eps/sqrt(this->ndim());
895 		    if (not verify()) MADNESS_EXCEPTION("ranks in TensorTrain inconsistent",1);
896 
897 		    // right-to-left orthogonalization (line 4)
898 		    // get maximum rank and maximum k-value
899 		    // cores are (k0,r0), (r0,k1,r1), (r1,k2,r2), ... (rd-1,kd)
900 		    long rmax = core[0].dim(1);
901 		    long kmax = core[0].dim(0);
902 		    for(size_t i=1;i<core.size();i++){
903 		        rmax = std::max(rmax,core[i].dim(0));
904 		        kmax = std::max(kmax,core[i].dim(1));
905 		    }
906 
907 		    Tensor<T> L;//L_buffer(rmax,rmax*kmax);
908 		    Tensor<T> lq_tau(rmax);
909 		    long max_rk = std::max(rmax,kmax);
910 		    long lq_work_dim = 2*max_rk+(max_rk+1)*64;
911 		    Tensor<T> lq_work(lq_work_dim);
912 		    Tensor<T> L_buffer(max_rk,max_rk);
913 		    long dimensions[TENSOR_MAXDIM];
914 		    // last tensor differs in dimension (first tensor also, but we dont reach it in the loop)
915 		    if(core.size()>1){
916 		        const long n_dim = core.back().ndim();
917 		        for (int i=0; i<n_dim; ++i) dimensions[i]=core.back().dim(i);
918 
919 		        const long r0 = core.back().dim(0);
920 		        const long r1 = core.back().size()/r0;
921 		        core.back()=core.back().reshape(r0,r1);
922 
923 		        // assignement of L with the L_buffer tensor
924 		        // works only if the bool for lq_result is set to false
925 		        {
926 		            long r_rows= (core.back().dim(1)>=core.back().dim(0)) ? core.back().dim(0) : core.back().dim(1);
927 		            long r_cols=core.back().dim(0);
928 		            L = L_buffer(Slice(0,r_cols-1),Slice(0,r_rows-1));
929 		            L = 0.0;
930 		        }
931 		        lq_result(core.back(),L,lq_tau,lq_work,false);
932 		        //Tensor<T> L = L_buffer(Slice(0,r0-1),Slice(0,r1-1));
933 
934 		        dimensions[0]=std::min(dimensions[0],core.back().dim(0));
935 		        core.back()=core.back().reshape(n_dim,dimensions);
936 		        // multiply to the left (line 6)
937 		        core[core.size()-2]=inner(core[core.size()-2],L);
938 
939 		    }
940 
941 
942 		    for (std::size_t d=core.size()-2; d>0; --d) {
943 
944 		        // save tensor structure
945 		        const long ndim=core[d].ndim();
946 		        for (int i=0; i<ndim; ++i) dimensions[i]=core[d].dim(i);
947 
948 		        // G(r0, k*r1)
949 		        const long r0=core[d].dim(0);
950 		        const long r1=core[d].size()/r0;
951 		        core[d]=core[d].reshape(r0,r1);
952 
953 		        // decompose the core tensor (line 5)
954 		        //lq(core[d],L_buffer);  // might shrink the core
955 
956 		        // assignement of L with the inner_buffer tensor
957 		        // works only if the bool for lq_result is set to false
958 		        {
959 		            long r_rows= (core[d].dim(1)>=core[d].dim(0)) ? core[d].dim(0) : core[d].dim(1);
960 		            long r_cols=core[d].dim(0);
961 		            L = L_buffer(Slice(0,r_cols-1),Slice(0,r_rows-1));
962 		            L = 0.0;
963 		        }
964 
965 		        // workaround for LQ decomposition to avoid reallocations
966 		        //L_buffer = 0.0;
967 		        lq_tau = 0.0;
968 		        lq_work = 0.0;
969 		        lq_result(core[d],L,lq_tau,lq_work,false);
970 		        // slice L to the right size
971 		        //Tensor<T> L = L_buffer(Slice(0,r0-1),Slice(0,r1-1));
972 
973 		        dimensions[0]=std::min(r0,core[d].dim(0));
974 		        core[d]=core[d].reshape(ndim,dimensions);
975 
976 		        // multiply to the left (line 6)
977 		        core[d-1]=inner(core[d-1],L);
978 		    }
979 
980 		    // svd buffer tensor (see svd_results in lapack.cc)
981 		    long m =rmax*kmax;
982 		    long n =rmax;
983 		    long k =std::min<long>(m,n);
984 		    long svd_buffer_size = std::max<long>(3*std::min<long>(m,n)+std::max<long>(m,n),5*std::min<long>(m,n)-4)*32;
985 		    Tensor<T> svd_buffer(svd_buffer_size);
986 		    Tensor<T> U_buffer(m,k);
987 		    Tensor<T> dummy;
988 		    Tensor< typename Tensor<T>::scalar_type > s_buffer(k);
989 
990 		    // left-to-right SVD (line 9)
991 		    for (std::size_t d=0; d<core.size()-1; ++d) {
992 
993 		        // save tensor structure
994 		        const long ndim=core[d].ndim();
995 		        for (int i=0; i<ndim; ++i) dimensions[i]=core[d].dim(i);
996 
997 		        // reshape the core tensor (r0*k, r1)
998 		        long r1=core[d].dim(core[d].ndim()-1);
999 		        //				long r1=core[d].dim(1);
1000 		        core[d]=core[d].reshape(core[d].size()/r1,r1);
1001 
1002 		        Tensor<T> U,VT;
1003 		        long ds=std::min(core[d].dim(0),core[d].dim(1));
1004 		        s_buffer = 0.0;
1005 		        //Tensor< typename Tensor<T>::scalar_type > s(ds)
1006 		        Tensor< typename Tensor<T>::scalar_type > s = s_buffer(Slice(0,ds-1));
1007 
1008 		        // decompose (line 10)
1009 		        //svd(core[d],U,s,VT);
1010 		        // get the dimensions of U and V
1011 		        long du = core[d].dim(0);
1012 		        long dv = core[d].dim(1);
1013 		        U_buffer = 0.0;
1014 		        svd_buffer = 0.0;
1015 		        U_buffer = U_buffer.flat();
1016 		        // VT is written on core[d] input
1017 		        svd_result(core[d],U_buffer,s,dummy,svd_buffer);
1018 
1019 		        // truncate the SVD
1020 		        int r_truncate=SRConf<T>::max_sigma(eps,ds,s)+1;
1021 		        if (r_truncate==0) {
1022 		            zero_me(tt_dims);
1023 		            return;
1024 		        }
1025 
1026 		        // make tensors contiguous
1027 		        U=madness::copy(U_buffer(Slice(0,(du*ds)-1)).reshape(du,ds)(_,Slice(0,r_truncate-1)));
1028 		        VT=madness::copy(core[d](Slice(0,r_truncate-1),Slice(0,dv-1)));
1029 
1030 		        dimensions[ndim-1]=r_truncate;
1031 		        core[d]=U.reshape(ndim,dimensions);
1032 
1033 		        for (int i=0; i<VT.dim(0); ++i) {
1034 		            for (int j=0; j<VT.dim(1); ++j) {
1035 		                VT(i,j)*=s(i);
1036 		            }
1037 		        }
1038 
1039 		        // multiply to the right (line 11)
1040 		        core[d+1]=inner(VT,core[d+1]);
1041 
1042 		    }
1043 
1044 		    if (not verify()) MADNESS_EXCEPTION("ranks in TensorTrain inconsistent",1);
1045 		}
1046 
1047 		/// return the number of dimensions
ndim()1048 		long ndim() const {return core.size();}
1049 
1050 		/// return the number of coefficients in all core tensors
size()1051 		long size() const {
1052 			if (zero_rank) return 0;
1053 			long n=0;
1054 			typename std::vector<Tensor<T> >::const_iterator it;
1055 			for (it=core.begin(); it!=core.end(); ++it) n+=it->size();
1056 			return n;
1057 		}
1058 
1059 		/// return the size of this instance, including static memory for vectors and such
real_size()1060 		long real_size() const {
1061 			long n=this->size()*sizeof(T);
1062 			n+=core.size() * sizeof(Tensor<T>);
1063 			n+=sizeof(*this);
1064 			return n;
1065 		}
1066 
1067 		/// return the number of entries in dimension i
dim(const int i)1068 		long dim(const int i) const {
1069 			if (i==0) return core[0].dim(0);
1070 			return core[i].dim(1);
1071 		}
1072 
1073 		/// return the dimensions of this tensor
dims()1074 		std::vector<long> dims() const {
1075 		    std::vector<long> d(ndim());
1076             d[0]=core[0].dim(0);    // dim,rank
1077 		    for (long i=1; i<ndim(); ++i) d[i]=core[i].dim(1);  // rank,dim,rank
1078 		    return d;
1079 		}
1080 
1081 		/// check that the ranks of all core tensors are consistent
verify()1082 		bool verify() const {
1083 		    if (core.size()<2) return true; // no ranks
1084 		    if (core[0].dim(1)!=core[1].dim(0)) return false;
1085 		    for (int d=2; d<ndim(); ++d) {
1086 		        if (core[d-1].dim(2)!=core[d].dim(0)) return false;
1087 		    }
1088 		    for (const Tensor<T>& c : core) {
1089 		        int size=1;
1090 		        for (int i=0; i<c.ndim(); ++i) size*=c.dim(i);
1091 		        if (size!=c.size()) return false;
1092 		        if (not c.iscontiguous()) return false;
1093 		    }
1094 		    return true;
1095 		}
1096 
1097 		/// if rank is zero
is_zero_rank()1098 		bool is_zero_rank() const {return zero_rank;}
1099 
1100 		/// return the TT ranks
ranks()1101 		std::vector<long> ranks() const {
1102 			if (zero_rank) return std::vector<long>(core.size()-1,0);
1103 			MADNESS_ASSERT(is_tensor());
1104 			std::vector<long> r(core.size()-1);
1105 			for (std::size_t i=0; i<r.size(); ++i) r[i]=core[i+1].dim(0);
1106 			return r;
1107 		}
1108 
1109         /// return the TT ranks for dimension i (to i+1)
ranks(const int i)1110        long ranks(const int i) const {
1111             if (zero_rank) return 0;
1112             if (i<core.size()-1) {
1113                 return core[i].dim(core[i].ndim()-1);
1114             } else {
1115                 print("ndim ",ndim());
1116                 print("i    ",i);
1117                 MADNESS_EXCEPTION("requested invalid rank in TensorTrain",1);
1118                 return 0;
1119             }
1120         }
1121 
1122         /// returns the Frobenius norm
normf()1123         float_scalar_type normf() const {
1124             return sqrt(float_scalar_type(std::abs(trace(*this))));
1125         };
1126 
1127         /// scale this by a number
1128 
1129         /// @param[in]  fac the factor to multiply
1130         /// @return *this * fac
scale(T fac)1131         void scale(T fac) {
1132             if (not zero_rank and (core.size()>0)) core.front().scale(fac);
1133         }
1134 
1135         /// Returns a pointer to the internal data
1136 
1137         /// @param[in]  ivec    index of core vector to which the return values points
1138         T* ptr(const int ivec=0) {
1139             if (core.size()) return core[ivec].ptr();
1140             return 0;
1141         }
1142 
1143         /// Returns a pointer to the internal data
1144 
1145         /// @param[in]  ivec    index of core vector to which the return values points
1146         const T* ptr(const int ivec=0) const {
1147             if (core.size()) return core[ivec].ptr();
1148             return 0;
1149         }
1150 
1151         /// check if this is a tensor (r,k,r)
is_tensor()1152         bool is_tensor() const {
1153             if (ndim()>0) return (core[0].ndim()==2);
1154             return false;
1155         }
1156 
1157         /// check if this is an operator (r,k',k,r)
is_operator()1158         bool is_operator() const {
1159             return (!is_tensor());
1160         }
1161 
1162         /// convert this into a tensor representation (r,k,r)
make_tensor()1163         TensorTrain<T>& make_tensor() {
1164             if (is_tensor()) return *this;
1165 
1166             long nd=this->ndim();
1167             core[0]=core[0].fusedim(0);         // (k,k,r) -> (k,r)
1168             core[nd-1]=core[nd-1].fusedim(1);   // (r,k,k) -> (r,k)
1169             for (int i=1; i<nd-1; ++i) core[i]=core[i].fusedim(1);  // (r,k',k,r) -> (r,k,r)
1170             return *this;
1171 
1172         }
1173 
1174         /// convert this into an operator representation (r,k',k,r)
make_operator()1175         TensorTrain<T>& make_operator() {
1176             if (is_operator()) return *this;
1177 
1178             long nd=this->ndim();
1179 
1180             long k2=core[0].dim(0);
1181             long k0=sqrt(k2);
1182             MADNESS_ASSERT(k0*k0==k2);
1183             MADNESS_ASSERT(core[0].ndim()==2);
1184             core[0]=core[0].splitdim(0,k0,k0);         // (k*k,r) -> (k,k,r)
1185 
1186             k2=core[nd-1].dim(1);
1187             k0=sqrt(k2);
1188             MADNESS_ASSERT(k0*k0==k2);
1189             MADNESS_ASSERT(core[nd-1].ndim()==2);
1190             core[nd-1]=core[nd-1].splitdim(1,k0,k0);   // (r,k*k) -> (r,k,k)
1191 
1192             for (int i=1; i<nd-1; ++i) {
1193                 k2=core[i].dim(1);
1194                 k0=sqrt(k2);
1195                 MADNESS_ASSERT(k0*k0==k2);
1196                 MADNESS_ASSERT(core[i].ndim()==3);
1197                 core[i]=core[i].splitdim(1,k0,k0);  // (r,k*k,r) -> (r,k',k,r)
1198             }
1199             return *this;
1200         }
1201 
1202 
1203         /// reference to the internal core
get_core(const int i)1204         Tensor<T>& get_core(const int i) {
1205             MADNESS_ASSERT(i<ndim());
1206             return core[i];
1207         }
1208 
1209         /// const reference to the internal core
get_core(const int i)1210         const Tensor<T>& get_core(const int i) const {
1211             MADNESS_ASSERT(i<ndim());
1212             return core[i];
1213         }
1214 
1215         /// Return the trace of two tensors, no complex conjugate involved
1216 
1217         /// @return <this | B>
1218         template <class Q>
TENSOR_RESULT_TYPE(T,Q)1219         TENSOR_RESULT_TYPE(T,Q) trace(const TensorTrain<Q>& B) const {
1220             if (TensorTypeData<T>::iscomplex) MADNESS_EXCEPTION("no complex trace in TensorTrain, sorry",1);
1221             if (TensorTypeData<Q>::iscomplex) MADNESS_EXCEPTION("no complex trace in TensorTrain, sorry",1);
1222 
1223             typedef TENSOR_RESULT_TYPE(T,Q) resultT;
1224 
1225             // alias
1226             const TensorTrain<T>& A=*this;
1227 
1228             MADNESS_ASSERT(A.ndim()==B.ndim());   // number of dimensions
1229 
1230             // fast return
1231             if (A.zero_rank or B.zero_rank) return resultT(0.0);
1232             if (A.ndim()==1) {
1233                 return A.core[0].trace(B.core[0]);
1234             }
1235 
1236             // set up temporary tensors for intermediates
1237             long size1=A.ranks(0)*B.ranks(0);                   // first contraction
1238             long size2=B.ranks(ndim()-2)*A.dim(ndim()-1);       // last contraction
1239 
1240             for (int d=1; d<A.ndim()-1; ++d) {
1241                 size1=std::max(size1,A.ranks(d)*B.ranks(d));
1242                 size2=std::max(size2,A.ranks(d)*B.ranks(d-1)*A.dim(d));
1243             }
1244             Tensor<resultT> tmp1(size1), tmp2(size2);       // scratch
1245             Tensor<resultT> Aprime, AB;                     // for flat views of tmp
1246 
1247             // loop over all dimensions but the last one
1248             for (int d=0; d<A.ndim()-1; ++d) {
1249 
1250                 // contract cores to matrix AB(rank(A), rank(B))
1251                 // AB(ra1,rb1) = sum_(r0,i0) A(ra0,i0,ra1) B(rb0,i0,rb1)
1252                 // index dimension is the first one: core((r * i), r)
1253 
1254                 // calculate dimensions
1255                 long rA= (d==0) ? A.core[d].dim(1) : Aprime.dim(2);     //  r_d (A)
1256                 long rB= (d==0) ? B.core[d].dim(1) : B.core[d].dim(2);                           //  r_d (B)
1257                 MADNESS_ASSERT(rA*rB<=size1);
1258                 if (d>0) tmp1(Slice(0,rA*rB-1))=0.0;         // zero out old stuff
1259 
1260 //                Tensor<resultT> AB;
1261                 if (d==0) {
1262 //                    AB=inner(A.core[d],B.core[d],0,0);
1263                     inner_result(A.core[d],B.core[d],0,0,tmp1);
1264                 } else {
1265 //                    AB=inner(Aprime.fusedim(0),B.core[d].fusedim(0),0,0);
1266                     inner_result(Aprime.fusedim(0),B.core[d].fusedim(0),0,0,tmp1);
1267                 }
1268                 AB=tmp1(Slice(0,rA*rB-1)).reshape(rA,rB);
1269 
1270                 // contract temp matrix AB into core of A of dimension i+1
1271                 // into temp core A_i+1
1272                 // Atmp(rank(B1), i(2)) = sum_ rank(A1) ABi(rank(A1),rank(B1)) A.core[1](rank(A1),i(2),rank(A2))
1273 
1274                 // calculate dimensions
1275                 long d1=AB.dim(1);              //  r_d
1276                 long d2=A.core[d+1].dim(1);     //  i_d
1277                 long d3=A.core[d+1].dim(2);     //  r_{d+1}
1278                 MADNESS_ASSERT(d1*d2*d3<=size2);
1279 
1280                 // repeated zero-ing probably much faster than reallocation
1281                 //                Aprime=inner(AB,A.core[d+1],0,0);
1282                 if (d>0) tmp2(Slice(0,d1*d2*d3-1))=0.0;
1283                 inner_result(AB,A.core[d+1],0,0,tmp2);
1284                 Aprime=tmp2(Slice(0,d1*d2*d3-1)).reshape(d1,d2,d3);
1285 
1286             }
1287 
1288             // special treatment for the last dimension
1289             resultT result=Aprime.trace(B.core[ndim()-1]);
1290             return result;
1291         }
1292 
1293 
1294         template <typename R, typename Q>
1295         friend TensorTrain<TENSOR_RESULT_TYPE(R,Q)> transform(
1296                 const TensorTrain<R>& t, const Tensor<Q>& c);
1297 
1298         template <typename R, typename Q>
1299         friend TensorTrain<TENSOR_RESULT_TYPE(R,Q)> general_transform(
1300                 const TensorTrain<R>& t, const Tensor<Q> c[]);
1301 
1302         template <typename R, typename Q>
1303         friend TensorTrain<TENSOR_RESULT_TYPE(R,Q)> transform_dir(
1304                 const TensorTrain<R>& t, const Tensor<Q>& c, const int axis);
1305 
1306         template <typename R, typename Q>
1307         friend TensorTrain<TENSOR_RESULT_TYPE(R,Q)> outer(
1308                 const TensorTrain<R>& t1, const TensorTrain<Q>& t2);
1309 
1310 	};
1311 
1312 
1313 	/// transform each dimension with the same operator matrix
1314 
1315     /// result(i,j,k...) <-- sum(i',j', k',...) t(i',j',k',...) c(i',i) c(j',j) c(k',k) ...
1316     /// TODO: merge this with general_transform
1317 	template <class T, class Q>
transform(const TensorTrain<T> & t,const Tensor<Q> & c)1318     TensorTrain<TENSOR_RESULT_TYPE(T,Q)> transform(const TensorTrain<T>& t,
1319             const Tensor<Q>& c) {
1320 
1321         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
1322 
1323         // fast return if possible
1324         if (t.zero_rank or (t.ndim()==0)) return TensorTrain<resultT>(t.dims());
1325 
1326         const long ndim=t.ndim();
1327 
1328         TensorTrain<resultT> result;
1329         result.zero_rank=false;
1330         result.core.resize(ndim);
1331         // special treatment for first core(i1,r1) and last core (rd-1, id)
1332         result.core[0]=inner(c,t.core[0],0,0);
1333         if (ndim>1) result.core[ndim-1]=inner(t.core[ndim-1],c,1,0);
1334 
1335         // other cores have dimensions core(r1,i2,r2);
1336 
1337         // set up scratch tensor
1338         long size=0;
1339         for (int d=1; d<ndim-1; ++d) size=std::max(size,t.core[d].size());
1340         Tensor<resultT> tmp(size);
1341 
1342         for (int d=1; d<ndim-1; ++d) {
1343             long r1=t.core[d].dim(0);
1344             long i2=t.core[d].dim(1);
1345             long r2=t.core[d].dim(2);
1346 
1347             // zero out old stuff from the scratch tensor
1348             if (d>1) tmp(Slice(0,r1*i2*r2-1))=0.0;
1349             inner_result(t.core[d],c,1,0,tmp);
1350             result.core[d]=copy(tmp(Slice(0,r1*i2*r2-1)).reshape(r1,r2,i2).swapdim(1,2));
1351         }
1352         return result;
1353     }
1354 
1355 
1356     /// Transform all dimensions of the tensor t by distinct matrices c
1357 
1358     /// \ingroup tensor
1359     /// Similar to transform but each dimension is transformed with a
1360     /// distinct matrix.
1361     /// \code
1362     /// result(i,j,k...) <-- sum(i',j', k',...) t(i',j',k',...) c[0](i',i) c[1](j',j) c[2](k',k) ...
1363     /// \endcode
1364     /// The first dimension of the matrices c must match the corresponding
1365     /// dimension of t.
1366     template <class T, class Q>
general_transform(const TensorTrain<T> & t,const Tensor<Q> c[])1367     TensorTrain<TENSOR_RESULT_TYPE(T,Q)> general_transform(const TensorTrain<T>& t,
1368             const Tensor<Q> c[]) {
1369 
1370         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
1371 
1372         // fast return if possible
1373         if (t.zero_rank or (t.ndim()==0)) return TensorTrain<resultT>(t.dims());
1374 
1375         const long ndim=t.ndim();
1376 
1377         TensorTrain<resultT> result;
1378         result.zero_rank=false;
1379         result.core.resize(ndim);
1380         // special treatment for first core(i1,r1) and last core (rd-1, id)
1381         result.core[0]=inner(c[0],t.core[0],0,0);
1382         if (ndim>1) result.core[ndim-1]=inner(t.core[ndim-1],c[ndim-1],1,0);
1383 
1384         // other cores have dimensions core(r1,i2,r2);
1385 
1386         // set up scratch tensor
1387         long size=0;
1388         for (int d=1; d<ndim-1; ++d) size=std::max(size,t.core[d].size());
1389         Tensor<resultT> tmp(size);
1390 
1391         for (int d=1; d<ndim-1; ++d) {
1392             long r1=t.core[d].dim(0);
1393             long i2=t.core[d].dim(1);
1394             long r2=t.core[d].dim(2);
1395 
1396             // zero out old stuff from the scratch tensor
1397             if (d>1) tmp(Slice(0,r1*i2*r2-1))=0.0;
1398             inner_result(t.core[d],c[d],1,0,tmp);
1399             result.core[d]=copy(tmp(Slice(0,r1*i2*r2-1)).reshape(r1,r2,i2).swapdim(1,2));
1400         }
1401         return result;
1402     }
1403 
1404     /// Transforms one dimension of the tensor t by the matrix c, returns new contiguous tensor
1405 
1406     /// \ingroup tensor
1407     /// \code
1408     /// transform_dir(t,c,1) = r(i,j,k,...) = sum(j') t(i,j',k,...) * c(j',j)
1409     /// \endcode
1410     /// @param[in] t Tensor to transform (size of dimension to be transformed must match size of first dimension of \c c )
1411     /// @param[in] c Matrix used for the transformation
1412     /// @param[in] axis Dimension (or axis) to be transformed
1413     /// @result Returns a new tensor train
1414     template <class T, class Q>
transform_dir(const TensorTrain<T> & t,const Tensor<Q> & c,const int axis)1415     TensorTrain<TENSOR_RESULT_TYPE(T,Q)> transform_dir(const TensorTrain<T>& t,
1416             const Tensor<Q>& c, const int axis) {
1417 
1418         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
1419 
1420         // fast return if possible
1421         if (t.zero_rank or (t.ndim()==0)) return TensorTrain<resultT>(t.dims());
1422 
1423         const long ndim=t.ndim();
1424         MADNESS_ASSERT(axis<ndim and axis>=0);
1425         MADNESS_ASSERT(c.ndim()==2);
1426 
1427         TensorTrain<resultT> result=copy(t);
1428 
1429         if (axis==0) {
1430             result.core[0]=inner(c,t.core[0],0,0);
1431         } else if (axis==ndim-1) {
1432             result.core[ndim-1]=inner(t.core[ndim-1],c,1,0);
1433         } else {
1434             Tensor<resultT> tmp=inner(t.core[axis],c,1,0);  // G~(r1,r2,i')
1435             result.core[axis]=copy(tmp.swapdim(1,2));
1436         }
1437         return result;
1438 
1439     }
1440 
1441 
1442     /// apply an operator in TT format on a tensor in TT format
1443 
1444     /// @param[in]  op  operator in TT format ..(r_1,k',k,r_2)..
1445     /// @param[in]  t   tensor in TT format  ..(r_1,k',r_2)..
1446     /// the result tensor will be
1447     /// .. (r_1,k,r_2) = \sum_k' ..(r_1,k',k,r_2)..  ..(r_1,k',r_2)..
1448     /// during the apply a rank reduction will be performed
1449     /// 2*ndim allocates are needed
1450     template <class T, class Q>
apply(const TensorTrain<T> & op,const TensorTrain<Q> & t,const double thresh)1451     TensorTrain<TENSOR_RESULT_TYPE(T,Q)> apply(const TensorTrain<T>& op,
1452             const TensorTrain<Q>& t, const double thresh) {
1453 
1454         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
1455         MADNESS_ASSERT(op.ndim()==t.ndim());
1456 
1457         const long nd=t.ndim();
1458 
1459         // need to add special cases for low dimensions
1460         MADNESS_ASSERT(nd>2);
1461 
1462         std::vector<Tensor<resultT> > B(t.ndim());  // will be the result cores
1463 
1464         // set up scratch tensors
1465         long maxk=0;           // max dimension of the tensor t
1466         long maxr_t=1;         // max rank of the input tensor t
1467         long maxr_op=1;        // max rank of the operator op
1468         for (int i=0; i<nd-1; ++i) {
1469             maxk=std::max(maxk,t.dim(i));
1470             maxr_t=std::max(t.ranks(i),maxr_t);
1471             maxr_op=std::max(op.ranks(i),maxr_op);
1472         }
1473 
1474         long maxr_r=1;           // max rank of the result tensor
1475         for (int i=0, j=nd-1; i<j; ++i, --j) {
1476             maxr_r*=t.dim(i);
1477         }
1478 
1479         long maxn=0, maxm=0;
1480 //        {
1481             long R11=t.dim(0);         // max final ranks
1482             long rR=R11*t.ranks(1);      // R1 r2
1483             long maxR=R11, maxr=t.ranks(1);            //
1484             for (int i=1; i<nd-2; ++i) {
1485                 long k=t.dim(i);
1486                 R11*=k;     // max final ranks
1487                 R11=std::min(R11,maxr_r);
1488                 long r2=t.ranks(i+1);   // initial ranks
1489                 if (rR<R11*r2) {
1490                     maxR=std::max(maxR,R11);
1491                     maxr=std::max(maxr,r2);
1492                     rR=R11*r2;
1493                 }
1494             }
1495             // max matrix dimensions to be svd'ed
1496             maxn=maxR*maxk;
1497             maxm=maxr_op*maxr;
1498 //        }
1499 
1500 
1501         if (maxm*maxn>5e7) {
1502             print("huge scratch spaces!! ",maxn*maxm/1024/1024,"MByte");
1503         }
1504         long lscr=std::max(3*std::min(maxm,maxn)+std::max(maxm,maxn),
1505                 5*std::min(maxm,maxn)) + maxn*maxm;
1506         Tensor<resultT> scr(lscr);   // scratch
1507         Tensor< typename Tensor<T>::scalar_type > s(std::min(maxn,maxm));
1508 
1509         // scratch space for contractions
1510         Tensor<resultT> scr3(2*maxn*maxm);
1511         Tensor<resultT> scr1=scr3(Slice(0,maxn*maxm-1));
1512         Tensor<resultT> scr2=scr3(Slice(maxn*maxm,-1));
1513 
1514 
1515         // contract first core
1516         const long r0=t.ranks(0l);
1517         const long q0=op.get_core(0).dim(2);
1518         const long k0=t.dim(0);
1519         inner_result(op.get_core(0),t.get_core(0),0,0,scr2);
1520         Tensor<resultT> AC=scr2(Slice(0,r0*q0*k0-1)).reshape(k0,r0*q0);
1521 
1522         // SVD on first core, skip small singular values
1523         long R=rank_revealing_decompose(AC,B[0],thresh,s,scr1);
1524         if (R==0) return TensorTrain<resultT>(t.dims());    // fast return for zero ranks
1525         B[0]=B[0].reshape(k0,R);
1526 
1527         // AC has dimensions R1,(q1,r1)
1528         Tensor<resultT> VT=AC.reshape(R,q0,r0);
1529 
1530         // loop over all dimensions 1,..,nd-1
1531         for (int d=1; d<nd; ++d) {
1532 
1533             // alias
1534             Tensor<T> C=t.get_core(d);
1535             Tensor<T> A=op.get_core(d);
1536 
1537             // alias dimensions
1538             long R1=VT.dim(0);        // left rank of the result tensor
1539             long q1=A.dim(0);         // left rank of the operator
1540             long k=C.dim(1);          // true for d>0
1541             long r2=1;                // right rank of the input tensor, true for d=nd-1
1542             long q2=1;                // right rank of the operator, true for d=nd-1
1543             if (d<nd-1) {
1544                 r2=C.dim(2);          // right rank of the input tensor, true for d<nd-1
1545                 q2=A.dim(3);          // right rank of the operator
1546             }
1547 
1548             // contract VT into the next core
1549             Tensor<resultT> VC=scr1(Slice(0,R1*q1*k*r2-1));
1550             VC(Slice(0,R1*q1*k*r2-1))=resultT(0.0);         // zero out result tensor
1551             inner_result(VT,C,-1,0,VC);          // VT(R1,q1,r1) * C(r1,k',r2)
1552 
1553             // contract A into VC (R,q,k,r2)
1554             VC=VC.reshape(R1,q1*k,r2);           // VC(R,(q,k),r2)
1555             A=A.fusedim(0);                      // A((q1,k),k,q2);
1556             Tensor<resultT> AVC=scr2(Slice(0,R1*q2*k*r2-1));
1557             AVC(Slice(0,R1*q2*k*r2-1))=resultT(0.0);        // zero out result tensor
1558             inner_result(VC,A,1,0,AVC);          // AVC(R1,r2,k,q2)
1559             AVC=AVC.reshape(R1,r2,k,q2);
1560 
1561             // AVC is the final core if we have reached the end of the tensor train
1562             if (d==nd-1) {
1563                 B[d]=copy(AVC.reshape(R1,k));
1564                 break;
1565             }
1566 
1567             // SVD on current core
1568             AVC=copy(AVC.cycledim(2,1,3));       // AVC(R,k,q2,r2); deep copy necessary
1569 
1570             MADNESS_ASSERT(AVC.dim(0)==R1);
1571             MADNESS_ASSERT(AVC.dim(1)==k);
1572             MADNESS_ASSERT(AVC.dim(2)==q2);
1573 
1574             AVC=AVC.reshape(R1*k,q2*r2);
1575             long R2=rank_revealing_decompose(AVC,B[d],thresh,s,scr1);
1576             if (R2==0) return TensorTrain<resultT>(t.dims());    // fast return for zero ranks
1577             B[d]=B[d].reshape(R1,k,R2);
1578             VT=AVC.reshape(R2,q2,r2);
1579         }
1580 
1581         TensorTrain<T> result(B);
1582         return result;
1583 
1584     }
1585 
1586 
1587     /// compute the n-D identity operator with k elements per dimension
1588     template<typename T>
tt_identity(const long ndim,const long k)1589     TensorTrain<T> tt_identity(const long ndim, const long k) {
1590         Tensor<T> id(k,k);
1591         for (int i=0; i<k; ++i) id(i,i)=1.0;
1592         id=id.reshape(1,k,k,1);
1593         std::vector<Tensor<T> > cores(ndim,id);
1594         TensorTrain<T> result(cores);
1595         if (ndim>1) {
1596             result.get_core(0)=result.get_core(0).reshape(k,k,1);
1597             result.get_core(ndim-1)=result.get_core(ndim-1).reshape(1,k,k);
1598         } else {
1599             result.get_core(0)=result.get_core(0).reshape(k,k);
1600         }
1601         return result;
1602     }
1603 
1604 
1605     /// computes the outer product of two tensors
1606 
1607     /// result(i,j,...,p,q,...) = left(i,k,...)*right(p,q,...)
1608     /// @result Returns a new tensor train
1609     template <class T, class Q>
outer(const TensorTrain<T> & t1,const TensorTrain<Q> & t2)1610     TensorTrain<TENSOR_RESULT_TYPE(T,Q)> outer(const TensorTrain<T>& t1,
1611             const TensorTrain<Q>& t2) {
1612 
1613         typedef TENSOR_RESULT_TYPE(T,Q) resultT;
1614 
1615 
1616         // fast return if possible
1617         if (t1.zero_rank or t2.zero_rank) {
1618             // compute new dimensions
1619             std::vector<long> dims(t1.ndim()+t2.ndim());
1620             for (int i=0; i<t1.ndim(); ++i) dims[i]=t1.dim(i);
1621             for (int i=0; i<t2.ndim(); ++i) dims[t1.ndim()+i]=t2.dim(i);
1622 
1623             return TensorTrain<resultT>(dims);
1624         }
1625 
1626         TensorTrain<resultT> result;
1627         for (int i=0; i<t1.ndim(); ++i) result.core.push_back(copy(t1.core[i]));
1628         for (int i=0; i<t2.ndim(); ++i) result.core.push_back(copy(t2.core[i]));
1629 
1630         // reshape the new interior cores
1631         long core_dim=t1.core.back().ndim();       // 2 for tensors, 3 for operators
1632         long k1=t1.core.back().dim(core_dim-1);    // (r,k) for tensors, (r,k',k) for operators
1633         long k2=t2.core.front().dim(0);
1634         result.core[t1.ndim()-1]=result.core[t1.ndim()-1].splitdim(core_dim-1,k1,1);
1635         result.core[t1.ndim()]=result.core[t1.ndim()].splitdim(0,1,k2);
1636         result.zero_rank=false;
1637 
1638         return result;
1639 
1640     }
1641 
1642 }
1643 
1644 #endif /* TENSORTRAIN_H_ */
1645