1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 
31   $Id$
32 */
33 
34 /// \file SRConf.h
35 /// \brief handles the low-level details of a separated representation tensor
36 
37 #ifndef SRCONF_H_
38 #define SRCONF_H_
39 
40 //#define BENCH 0
41 
42 #include <madness/world/print.h>
43 #include <madness/tensor/tensor.h>
44 #include <madness/tensor/clapack.h>
45 #include <madness/tensor/tensor_lapack.h>
46 #include <list>
47 
48 namespace madness {
49 
50 
51 
52 #ifdef BENCH
53 	static double time_[30];
54 #endif
55 
56 
57 	template <class T> class GenTensor;
58     template <class T> class LowRankTensor;
59     template <class T> class SliceLowRankTensor;
60 
61 	/**
62 	 * A SRConf handles all the configurations in a Separated Representation.
63 	 */
64 
65 	template <typename T>
66 	class SRConf {
67 		friend class GenTensor<T>;
68 		friend class LowRankTensor<T>;
69         friend class SliceLowRankTensor<T>;
70 
71 		/// return the number of vectors (i.e. dim_eff) according to the TensorType
compute_nvec(const TensorType & tt)72 		static unsigned int compute_nvec(const TensorType& tt) {
73 			if (tt==TT_FULL) return 1;
74 			if (tt==TT_2D) return 2;
75 			print("unknown TensorType",tt);
76 			MADNESS_ASSERT(0);
77 		}
78 
79 		/// the scalar type of T
80 		typedef typename Tensor<T>::scalar_type scalar_type;
81 	public:
82 
83 #ifdef BENCH
time(int i)84 		static double& time(int i) {return time_[i];}
85 #endif
86 
87 		typedef Tensor<T> tensorT;
88 
89 		/// check orthonormality at low rank additions
90 		static const bool check_orthonormality=false;
91 
92 		/// the number of dimensions (the order of the tensor)
93 		unsigned int dim_;
94 
95 		/// for each configuration the weight; length should be r
96 		Tensor< typename Tensor<T>::scalar_type >  weights_;
97 
98 		/// for each (physical) dimension one Tensor of (logical) dimension (r,k)
99 		/// for vectors or (r,kprime,k) for operators
100 		std::vector<tensorT> vector_;
101 
102 		/// what is the rank of this
103 		long rank_;
104 
105 		/// the number of underlying basis functions
106 		/// the dimensions of vector_ will be
107 		/// vector_(rank,maxk),
108 		/// vector_(rank,maxk,maxk), etc
109 		unsigned int maxk_;
110 
111 		/// Slice containing the actual data in each vector, ignoring "empty" configurations;
112 		/// will maintain contiguity of the data.
113 		std::vector<Slice> s_;
114 
115 		/// how will this be represented
116 		TensorType tensortype_;
117 
118 
119 	public:
120 
121     	/// return the index of the last singular vector/value to meet the threshold
122 		///        (returns -1 if all meet threshold, i.e. || A ||_2 < threshold)
123     	/// given a matrix A in SVD form, truncate the singular values such that the
124     	/// accuracy threshold is still met.
125     	/// @param[in]	thresh	the threshold eps: || A - A(truncated) || < eps
126     	/// @param[in] 	rank	the number of singular values in w
127     	/// @param[in]	w		the weights/singular values of A
128     	/// @return		i		the index of s_max to contribute: w(Slice(0,i)); i.e. inclusive!
max_sigma(const double & thresh,const int & rank,const Tensor<double> & w)129     	static int max_sigma(const double& thresh, const int& rank, const Tensor<double>& w) {
130 
131     	    if (thresh<0.0) return rank-1;
132     		// find the maximal singular value that's supposed to contribute
133     		// singular values are ordered (largest first)
134     		double residual=0.0;
135     		long i;
136     		for (i=rank-1; i>=0; i--) {
137     			residual+=w(i)*w(i);
138     			if (residual>thresh*thresh) break;
139     		}
140     		return i;
141     	}
142 
143 
144 		/// default ctor
SRConf()145 		SRConf() : dim_(0), rank_(0), maxk_(0), s_(), tensortype_(TT_NONE) {
146 		};
147 
148 		/// ctor with dimensions for a vector configuration (tested)
SRConf(const unsigned int & dim,const unsigned int & k,const TensorType & tt)149 		SRConf(const unsigned int& dim, const unsigned int& k, const TensorType& tt)
150 			: dim_(dim)
151 			, rank_(0)
152 			, maxk_(k)
153 			, s_()
154 			, tensortype_(tt) {
155 
156 			// make sure dim is integer multiple of requested TT
157 			const long nvec=compute_nvec(tt);
158 			MADNESS_ASSERT(dim%nvec==0);
159 
160 			// construct empty vector
161 			weights_=Tensor<double>(int(0));
162 			vector_=std::vector<Tensor<T> > (nvec);
163 
164 			if (tt==TT_FULL) {
165 				vector_[0]=tensorT(std::vector<long>(dim,k));
166 				rank_=-1;
167 			} else {
168 				for (unsigned int idim=0; idim<nvec; idim++) vector_[idim]=Tensor<T>(0,kVec());
169 			}
170 			make_structure();
171 			MADNESS_ASSERT(has_structure());
172 		}
173 
174 		/// copy ctor (tested); shallow copy
SRConf(const SRConf & rhs)175 		SRConf(const SRConf& rhs)  {
176 			*this=rhs;
177             MADNESS_ASSERT(has_structure());
178 		}
179 
180 		/// ctor with provided weights and effective vectors; shallow copy
SRConf(const Tensor<double> & weights,const std::vector<Tensor<T>> & vectors,const unsigned int & dim,const unsigned int maxk,const TensorType & tt)181 		SRConf(const Tensor<double>& weights, const std::vector<Tensor<T> >& vectors,
182 				const unsigned int& dim, const unsigned int maxk, const TensorType& tt)
183 			: dim_(dim)
184 			, maxk_(maxk)
185 			, tensortype_(tt) {
186 
187 			// consistency check
188 			MADNESS_ASSERT(vectors.size()>0);
189 			MADNESS_ASSERT(weights.ndim()==1 and weights.dim(0)==vectors[0].dim(0));
190 
191 			// compute dimension
192 			unsigned int nvec=compute_nvec(tt);
193 			MADNESS_ASSERT(vectors.size()==nvec);
194 			MADNESS_ASSERT(dim%nvec==0);
195 
196 			rank_=weights.dim(0);
197 			weights_=weights;
198 			vector_=std::vector<Tensor<T> > (long(vectors.size()));
199 			for (unsigned int idim=0; idim<vectors.size(); idim++) {
200 				vector_[idim]=vectors[idim];
201 			}
202 			make_slices();
203             MADNESS_ASSERT(has_structure());
204 		}
205 
206 		/// explicit ctor with one vector (aka full representation), shallow
SRConf(const tensorT & vector1)207 		SRConf(const tensorT& vector1)
208 			: dim_(vector1.ndim())
209 			, weights_(Tensor<double>())
210 			, rank_(-1)
211 			, maxk_(vector1.dim(0))
212 			, tensortype_(TT_FULL) {
213 
214 			vector_.resize(1);
215 			vector_[0]=vector1;
216 			MADNESS_ASSERT(has_structure());
217 		}
218 
219 		/// explicit ctor with two vectors (aka SVD), shallow
SRConf(const Tensor<double> & weights,const tensorT & vector1,const tensorT & vector2,const unsigned int & dim,const unsigned int maxk)220 		SRConf(const Tensor<double>& weights, const tensorT& vector1, const tensorT& vector2,
221 				const unsigned int& dim, const unsigned int maxk)
222 			: dim_(dim)
223 			, maxk_(maxk)
224 			, tensortype_(TT_2D) {
225 
226 			MADNESS_ASSERT(weights.ndim()==1);
227 			MADNESS_ASSERT(vector1.ndim()==2);
228 			MADNESS_ASSERT(vector2.ndim()==2);
229 			MADNESS_ASSERT(weights.dim(0)==vector1.dim(0));
230 			MADNESS_ASSERT(vector2.dim(0)==vector1.dim(0));
231 			vector_.resize(2);
232 			vector_[0]=vector1;
233 			vector_[1]=vector2;
234 			weights_=weights;
235 			rank_=weights.dim(0);
236 			make_structure();
237 			make_slices();
238             MADNESS_ASSERT(has_structure());
239 		}
240 
241 		/// assignment operator (tested), shallow copy of vectors
242 		SRConf& operator=(const SRConf& rhs)  {
243 
244 			// check for self-assignment
245 			if (&rhs==this) return *this;
246 
247 			// these always hold
248 			dim_=rhs.dim_;
249 			tensortype_=rhs.tensortype_;
250 			maxk_=rhs.maxk_;
251 			s_=rhs.s_;
252 
253 			if (rhs.has_no_data()) {
254 				// construct empty vector
255 				weights_=Tensor<double>(0);
256 				vector_=std::vector<Tensor<T> > (rhs.dim_eff());
257 				rank_=0;
258 				for (unsigned int idim=0; idim<dim_eff(); idim++) vector_[idim]=Tensor<T>(0,long(this->kVec()));
259 				make_structure();
260 
261 
262 			} else if (rhs.type()==TT_FULL) {
263 				weights_=Tensor<double>();
264 				rank_=-1;
265 				vector_.resize(1);
266 				vector_[0]=rhs.ref_vector(0);
267 
268 			} else {
269 				// assign vectors; shallow copy
270 				vector_.resize(rhs.vector_.size());
271 				for (unsigned int i=0; i<rhs.vector_.size(); i++) {
272 					vector_[i]=rhs.vector_[i];
273 				}
274 
275 				// shallow copy
276 				weights_=(rhs.weights_);
277 				rank_=rhs.rank();
278 
279 				// consistency check
280 				for (unsigned int idim=0; idim<dim_eff(); idim++) {
281 					MADNESS_ASSERT(weights_.dim(0)==vector_[idim].dim(0));
282 				}
283 			}
284             MADNESS_ASSERT(has_structure());
285 			return *this;
286 		}
287 
288 		/// assign a number to this;
289 		SRConf& operator=(const T& number) {
290 
291 		    if (type()==TT_2D) {
292                 // rank will be one
293                 rank_=1;
294                 vector_[0]=Tensor<T>(1,kVec());
295                 vector_[1]=Tensor<T>(1,kVec());
296                 vector_[0]=number;
297                 vector_[1]=1.0;
298                 weights_=Tensor< typename Tensor<T>::scalar_type > (1);
299                 weights_(0l)=1.0;
300                 make_structure();
301                 normalize();
302 		    } else if (type()==TT_FULL) {
303 		        vector_[0]=number;
304 
305 		    }
306 		    return *this;
307 		}
308 
309 		/// return some of the terms of the SRConf (start,..,end), inclusively
310 		/// shallow copy
get_configs(const int & start,const int & end)311 		const SRConf get_configs(const int& start, const int& end) const {
312 
313 			MADNESS_ASSERT((start>=0) and (end<=rank()));
314 			MADNESS_ASSERT(s_.size()>1);
315 			const long nvec=dim_eff();
316 			const long dim_pv_eff=s_.size()-1;	// #dim per vector minus rank-dim
317 
318 			Slice s(start,end);
319 			std::vector<tensorT> v(nvec);
320 
321 			// slice vectors
322 			if (dim_pv_eff==1) {
323 				for (long i=0; i<nvec; i++) v[i]=ref_vector(i)(s,_);
324 			} else if (dim_pv_eff==2) {
325 				for (long i=0; i<nvec; i++) v[i]=ref_vector(i)(s,_,_);
326 			} else if (dim_pv_eff==3) {
327 				for (long i=0; i<nvec; i++) v[i]=ref_vector(i)(s,_,_,_);
328 			} else {
329 				MADNESS_EXCEPTION("faulty dim_pv in SRConf::get_configs",0);
330 			}
331 
332 			SRConf<T> result(weights_(s),v,dim(),get_k(),type());
333             MADNESS_ASSERT(result.has_structure());
334 			return result;
335 		}
336 
337 		/// dtor
~SRConf()338 		~SRConf() {}
339 
340         template <typename Archive>
serialize(Archive & ar)341         void serialize(Archive& ar) {
342               	int i=int(tensortype_);
343               	ar & dim_ & weights_ & vector_ & rank_ & maxk_ & i;
344               	tensortype_=TensorType(i);
345               	make_slices();
346                 MADNESS_ASSERT(has_structure());
347         }
348 
349 		/// return the tensor type
type()350 		TensorType type() const {return tensortype_;};
351 
352 		/// does this have any data?
has_data()353 		bool has_data() const {
354 			if (tensortype_==TT_FULL) return (vector_.size()>0 and vector_[0].has_data());
355 			return rank()>0;
356 		}
357 
358 	private:
359 
360 		/// does this have any data?
has_no_data()361 		bool has_no_data() const {return !has_data();}
362 
363 		/// reserve enough space to hold at least r configurations
reserve(long r)364 		void reserve(long r) {
365 
366 			// this should at least hold the current information
367 			MADNESS_ASSERT(r>=this->rank());
368 			MADNESS_ASSERT(has_data() or vector_.size()>0);
369 
370 			// fast return if possible
371 			// nothing to be done
372 			if (r==0) return;
373 			// already large enuff?
374 			if (this->vector_[0].dim(0)>=r) return;
375 
376 			// to avoid incremental increase of the rank
377 			r+=3;
378 
379 			// for convenience
380 			const long rank=this->rank();
381 			const long kvec=this->kVec();
382 			const bool had_structure=this->has_structure();
383 			if (had_structure) this->undo_structure();
384 
385 			// transfer weights
386 			Tensor<scalar_type> newWeights(r);
387 			if (rank>0) newWeights(Slice(0,rank-1))=weights_(Slice(0,rank-1));
388 			std::swap(weights_,newWeights);
389 
390 			// transfer vectors
391 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
392 
393 				tensorT newVector(r,kvec);
394 				if (rank>0) newVector(this->c0())=vector_[idim](this->c0());
395 				std::swap(vector_[idim],newVector);
396 
397 			}
398 			MADNESS_ASSERT(weights_.dim(0)==vector_[0].dim(0));
399 			if (had_structure) this->make_structure(true);
400             MADNESS_ASSERT(has_structure());
401 
402 		}
403 
404 		/// return a Slice that corresponds the that part of vector_ that holds coefficients
c0()405 		const std::vector<Slice>& c0() const {
406 			MADNESS_ASSERT(s_.size()>0);
407 			return s_;
408 		}
409 
410 		/// reduce the rank using a divide-and-conquer approach
divide_and_conquer_reduce(const double & thresh)411 		void divide_and_conquer_reduce(const double& thresh) {
412 
413 			if (has_no_data()) return;
414 			if (rank()==1) {
415 				normalize();
416 				return;
417 			}
418 
419 			// divide the SRConf into two
420 			const long chunksize=8;
421 			if (rank()>chunksize) {
422         		SRConf<T> chunk1=this->get_configs(0,rank()/2);
423         		SRConf<T> chunk2=this->get_configs(rank()/2+1,rank()-1);
424         		chunk1.divide_and_conquer_reduce(thresh*0.5);
425         		chunk2.divide_and_conquer_reduce(thresh*0.5);
426 
427         		// collect the two SRConfs
428         		*this=chunk1;
429         		this->add_SVD(chunk2,thresh);
430 
431 			} else {
432 
433 				// and reduce the rank
434 				this->orthonormalize(thresh);
435 			}
436             MADNESS_ASSERT(has_structure());
437 		}
438 
439 	public:
440 		/// orthonormalize this
orthonormalize(const double & thresh)441 		void orthonormalize(const double& thresh) {
442 
443 			if (type()==TT_FULL) return;
444 			if (has_no_data()) return;
445 			if (rank()==1) {
446 				normalize();
447 				return;
448 			}
449 #ifdef BENCH
450 			double cpu0=wall_time();
451 #endif
452 //			vector_[0]=copy(vector_[0](c0()));
453 //			vector_[1]=copy(vector_[1](c0()));
454 //			weights_=weights_(Slice(0,rank()-1));
455             normalize();
456 #ifdef BENCH
457 			double cpu1=wall_time();
458 #endif
459 //            this->undo_structure();
460 			weights_=weights_(Slice(0,rank()-1));
461 //			ortho3(vector_[0],vector_[1],weights_,thresh);
462             tensorT v0=flat_vector(0);
463             tensorT v1=flat_vector(1);
464 #ifdef BENCH
465 			double cpu2=wall_time();
466 #endif
467 			ortho3(v0,v1,weights_,thresh);
468             std::swap(vector_[0],v0);
469             std::swap(vector_[1],v1);
470 #ifdef BENCH
471 			double cpu3=wall_time();
472 #endif
473 			rank_=weights_.size();
474 			MADNESS_ASSERT(rank_>=0);
475 			this->make_structure();
476 			make_slices();
477             MADNESS_ASSERT(has_structure());
478 #ifdef BENCH
479 			double cpu4=wall_time();
480 			SRConf<T>::time(21)+=cpu1-cpu0;
481 			SRConf<T>::time(22)+=cpu2-cpu1;
482 			SRConf<T>::time(23)+=cpu3-cpu2;
483 			SRConf<T>::time(24)+=cpu4-cpu3;
484 			SRConf<T>::time(20)+=cpu4-cpu0;
485 #endif
486 
487 		}
488 
489 	private:
490 		/// append configurations of rhs to this
491 
492 		/// simplified version of inplace_add for flattened configurations
493 		/// *this += fac*rhs
494 		void append(const SRConf<T>& rhs, const double fac=1.0) {
495 
496 			// fast return if possible
497 			if (rhs.has_no_data()) return;
498 			if (this->has_no_data()) {
499 				*this=copy(rhs);
500 				this->scale(fac);
501 				return;
502 			}
503 
504 			const long newRank=this->rank()+rhs.rank();
505 			const long lhsRank=this->rank();
506 			const long rhsRank=rhs.rank();
507 			reserve(newRank);
508 
509 			// assign weights
510 			this->weights_(Slice(lhsRank,newRank-1))=rhs.weights_(Slice(0,rhsRank-1))*fac;
511 			std::vector<Slice> s(dim_per_vector()+1,_);
512 			s[0]=Slice(lhsRank,newRank-1);
513 
514 			// assign vectors
515 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
516 //              vector_[idim](Slice(lhsRank,newRank-1),_)=rhs.vector_[idim](rhs.c0());
517 				vector_[idim](s)=rhs.vector_[idim](rhs.c0());
518 			}
519 
520 			rank_=newRank;
521 			make_slices();
522             MADNESS_ASSERT(has_structure());
523 
524 		}
525 		void append(const SRConf<T>& rhs, const double_complex fac=1.0) {
526 			MADNESS_EXCEPTION("no complex in SRConf",1);
527 		}
528 
529 		/// add two orthonormal configurations, yielding an optimal SVD decomposition
add_SVD(const SRConf<T> & rhs,const double & thresh)530 		void add_SVD(const SRConf<T>& rhs, const double& thresh) {
531 #ifdef BENCH
532 			double cpu0=wall_time();
533 #endif
534 			if (rhs.has_no_data()) return;
535 			if (has_no_data()) {
536 				*this=rhs;
537 				return;
538 			}
539 
540 			if (check_orthonormality) check_right_orthonormality();
541             if (check_orthonormality) rhs.check_right_orthonormality();
542 
543             this->undo_structure();
544             ortho5(ref_vector(0),ref_vector(1),weights_,
545 					rhs.flat_vector(0),rhs.flat_vector(1),rhs.weights_,thresh);
546 			rank_=weights_.size();
547 			make_structure();
548 			make_slices();
549             MADNESS_ASSERT(has_structure());
550 #ifdef BENCH
551 			double cpu1=wall_time();
552 			time(25)+=cpu1-cpu0;
553 #endif
554 		}
555 
556 	protected:
557 		/// alpha * this(lhs_s) + beta * rhs(rhs_s)
558 
559 		/// bounds checking should have been performed by caller
560 		/// s denotes where in lhs the new contribution from rhs will be inserted
inplace_add(const SRConf<T> & rhs2,std::vector<Slice> lhs_s,std::vector<Slice> rhs_s,const double alpha,const double beta)561 		void inplace_add(const SRConf<T>& rhs2, std::vector<Slice> lhs_s,
562 				std::vector<Slice> rhs_s, const double alpha, const double beta) {
563 
564 			// fast return if possible; no fast return for this.rank()==0
565 			// since we might work with slices!
566 			if (rhs2.has_no_data()) return;
567 
568 			// fast return for full rank tensors
569 			if (type()==TT_FULL) {
570 				vector_[0](lhs_s)+=rhs2.vector_[0](rhs_s);
571 				return;
572 			}
573 
574 			// unflatten this and rhs; shallow wrt vector_
575 			SRConf<T>& lhs=*this;
576 			const SRConf<T>& rhs=rhs2;
577 			if (lhs.has_no_data()) lhs.make_structure(true);
578 			MADNESS_ASSERT(lhs.has_structure() or (lhs.has_no_data()));
579 			MADNESS_ASSERT(rhs.has_structure());
580 
581 			// conflicts with lhs_s ??
582 			MADNESS_ASSERT(alpha==1.0);
583 
584 			// for convenience
585 			const long lhsRank=lhs.rank();
586 			const long rhsRank=rhs.rank();
587 			const long newRank=lhs.rank()+rhs.rank();
588 
589 			const long rhs_k=rhs.get_k();
590 			const long lhs_k=lhs.get_k();
591 
592 			const long dim_pv=lhs.dim_per_vector();
593 
594 			// adapt slices for use
595 			for (unsigned int idim=0; idim<lhs.dim(); idim++) {
596 				if (lhs_s[idim].end<0) lhs_s[idim].end+=lhs_k;
597 				if (rhs_s[idim].end<0) rhs_s[idim].end+=rhs_k;
598 				// make sure slices conform
599 				MADNESS_ASSERT((lhs_s[idim].end-lhs_s[idim].start) == (rhs_s[idim].end-rhs_s[idim].start));
600 				// make sure lhs can actually hold rhs(s)
601 				MADNESS_ASSERT(lhs_k>=(rhs_s[idim].end-rhs_s[idim].start+1));
602 			}
603 
604 			lhs.reserve(newRank);
605 
606 			// assign weights, and include factors alpha and beta
607 			if (alpha!=1.0) lhs.scale(alpha);
608 			lhs.weights_(Slice(lhsRank,newRank-1))=rhs.weights_(Slice(0,rhsRank-1))*beta;
609 
610 
611 			// assign vectors
612 			for (unsigned int idim=0; idim<lhs.dim_eff(); idim++) {
613 
614 				// insert rhs at the right place
615 				if (dim_pv==1) {
616 					lhs.ref_vector(idim)(Slice(lhsRank,newRank-1),lhs_s[idim])=
617 							rhs.ref_vector(idim)(Slice(0,rhsRank-1),rhs_s[idim]);
618 
619 				} else if (dim_pv==2) {
620 					lhs.ref_vector(idim)(Slice(lhsRank,newRank-1),lhs_s[2*idim],lhs_s[2*idim+1])=
621 							rhs.ref_vector(idim)(Slice(0,rhsRank-1),rhs_s[2*idim],rhs_s[2*idim+1]);
622 
623 				} else if (dim_pv==3) {
624 					lhs.ref_vector(idim)(Slice(lhsRank,newRank-1),lhs_s[3*idim],lhs_s[3*idim+1],lhs_s[3*idim+2])=
625 							rhs.ref_vector(idim)(Slice(0,rhsRank-1),rhs_s[3*idim],rhs_s[3*idim+1],rhs_s[3*idim+2]);
626 
627 				} else {
628 					MADNESS_EXCEPTION("extend dim_pv in srconf::inplace_add",0);
629 				}
630 			}
631 
632 			lhs.rank_=newRank;
633 			lhs.make_slices();
634             MADNESS_ASSERT(has_structure());
635 		}
636 
637 		/// deep copy of rhs, shrink
copy(const SRConf<T> & rhs)638 		friend SRConf<T> copy(const SRConf<T>& rhs) {
639 
640 			// if rhs is non-existent simply construct a new SRConf
641 			if (rhs.has_no_data()) return SRConf<T>(rhs.dim(),rhs.get_k(),rhs.type());
642 
643 			if (rhs.type()==TT_FULL) return SRConf<T>(copy(rhs.ref_vector(0)));
644 
645 			// pass a copy of the weights and vectors of rhs to ctor
646 			std::vector<tensorT> vector(rhs.dim_eff());
647 			for (unsigned int idim=0; idim<rhs.dim_eff(); idim++)
648 				vector[idim]=copy(rhs.ref_vector(idim)(rhs.c0()));
649 
650 			return SRConf<T>(copy(rhs.weights_(Slice(0,rhs.rank()-1))),vector,rhs.dim(),rhs.get_k(),rhs.type());
651 		}
652 
653 public:
654         /// return a slice of this (deep copy)
copy_slice(const std::vector<Slice> & s)655         SRConf<T> copy_slice(const std::vector<Slice>& s) const {
656 
657             // fast return if possible
658             if (this->has_no_data()) {
659                 int k_new=s[0].end-s[0].start+1;
660                 return SRConf<T>(dim(),k_new,this->type());
661             }
662 
663             // consistency check
664             MADNESS_ASSERT(s.size()==this->dim());
665             MADNESS_ASSERT(s[0].step==1);
666 
667             // fast return for full rank tensors
668             if (type()==TT_FULL) {
669                 tensorT a=copy(ref_vector(0)(s));
670                 return SRConf<T>(a);
671             }
672 
673 
674             MADNESS_ASSERT(has_structure());
675 //          _ptr->make_structure();
676 
677             // get dimensions
678             const TensorType tt=this->type();
679             const int merged_dim=this->dim_per_vector();
680             const int dim_eff=this->dim_eff();
681             const int rank=this->rank();
682             int k_new=s[0].end-s[0].start+1;
683             if (s[0].end<0) k_new+=this->get_k();
684 
685             // get and reshape the vectors, slice and re-reshape again;
686             // this is shallow
687             const SRConf<T>& sr=*this;
688 
689             std::vector<Tensor<T> > vectors(dim_eff,Tensor<T>());
690 
691             for (int idim=0; idim<dim_eff; idim++) {
692 
693                 // assignment from/to slice is deep-copy
694                 if (merged_dim==1) {
695                     if (rank>0) {
696                         vectors[idim]=copy(sr.ref_vector(idim)(Slice(0,rank-1),s[idim]));
697                     } else {
698                         vectors[idim]=Tensor<T>(0,s[idim].end-s[idim].start+1);
699                     }
700                 } else if (merged_dim==2) {
701                     if (rank>0) {
702                         vectors[idim]=copy(sr.ref_vector(idim)(Slice(0,rank-1),s[2*idim],s[2*idim+1]));
703                     } else {
704                         vectors[idim]=tensorT(0,s[2*idim].end-s[2*idim].start+1,
705                                                 s[2*idim+1].end-s[2*idim+1].start+1);
706                     }
707                 } else if (merged_dim==3) {
708                     if (rank>0) {
709                         vectors[idim]=copy(sr.ref_vector(idim)(Slice(0,rank-1),
710                                 s[3*idim],s[3*idim+1],s[3*idim+2]));
711                     } else {
712                         vectors[idim]=tensorT(0,s[3*idim].end-s[3*idim].start+1,
713                                 s[3*idim+1].end-s[3*idim+1].start+1,
714                                 s[3*idim+2].end-s[3*idim+2].start+1);
715 
716                     }
717                 } else MADNESS_EXCEPTION("unknown number of dimensions in GenTensor::copy_slice()",0);
718             }
719 
720             // work-around for rank==0
721             Tensor<double> weights;
722             if (rank>0) {
723                 weights=copy(this->weights_(Slice(0,rank-1)));
724             } else {
725                 weights=Tensor<double>(int(0));
726             }
727             return SRConf<T>(weights,vectors,this->dim(),k_new,tt);
728         }
729 
730         /// perform elementwise Hadamard product
emul(const SRConf<T> & other)731         SRConf<T>& emul(const SRConf<T>& other) {
732             // consistency check
733             MADNESS_ASSERT(this->dim()==other.dim());
734             MADNESS_ASSERT(this->get_k()==other.get_k());
735 
736             long finalrank=this->rank()*other.rank();
737             SRConf<T> result(dim(),get_k(),TT_2D);
738             if ((this->rank()==0) or (other.rank()==0)) {
739                 ;   // pass
740             } else {
741 
742                 result.vector_[0]=Tensor<T>(finalrank,kVec());
743                 result.vector_[1]=Tensor<T>(finalrank,kVec());
744                 result.weights_=outer(weights_,other.weights_).flat();
745                 result.rank_=finalrank;
746 
747                 for (int k=0; k<kVec(); ++k) {
748                     Tensor<T> a1=flat_vector(0)(_,Slice(k,k));   // (1,k)->(k)
749                     Tensor<T> a2=flat_vector(1)(_,Slice(k,k));
750                     Tensor<T> b1=other.flat_vector(0)(_,Slice(k,k));
751                     Tensor<T> b2=other.flat_vector(1)(_,Slice(k,k));
752 
753                     result.vector_[0](_,Slice(k,k))=outer(a1,b1).reshape(finalrank,1);
754                     result.vector_[1](_,Slice(k,k))=outer(a2,b2).reshape(finalrank,1);
755                 }
756             }
757             result.make_structure();
758             result.normalize();
759 
760             *this=result;
761             return *this;
762         }
763 
764 protected:
765 		/// redo the Slices for getting direct access to the configurations
make_slices()766 		void make_slices() {
767 			if (type()==TT_FULL) return;
768 			if (this->has_no_data()) {
769 				s_.clear();
770 			} else {
771 				// first dim is the rank
772 				if (vector_[0].ndim()>TENSOR_MAXDIM) {
773 					print(*this);
774 					MADNESS_EXCEPTION("serializing failed",0);
775 				}
776 				s_.resize(vector_[0].ndim());
777 				s_[0]=Slice(0,this->rank()-1);
778 				for (int i=1; i<vector_[0].ndim(); i++) {
779 					s_[i] = Slice(_);
780 				}
781 			}
782 		}
783 
784 
785 		void make_structure(bool force=false) {
786 
787 			// fast return if rank is zero
788 			if ((not force) and this->has_no_data()) return;
789 			if (type()==TT_FULL) return;
790 
791 			const int dim_pv=this->dim_per_vector();
792 			MADNESS_ASSERT(dim_pv>0 and dim_pv<=3);
793 			int rr=weights_.dim(0);	// not the rank!
794 			if (weights_.size()==0) rr=0;
795 			const int k=this->get_k();
796 
797 			// reshape the vectors and adapt the Slices
798 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
799 				if (dim_pv==2) this->vector_[idim]=vector_[idim].reshape(rr,k,k);
800 				if (dim_pv==3) this->vector_[idim]=vector_[idim].reshape(rr,k,k,k);
801 			}
802 
803 			this->make_slices();
804 
805 		}
806 
807 		void undo_structure(bool force=false) {
808 
809 			// fast return if rank is zero
810 			if ((not force) and this->has_no_data()) return;
811 			if (type()==TT_FULL) return;
812 
813 			const int dim_pv=this->dim_per_vector();
814 			MADNESS_ASSERT(dim_pv>0 and dim_pv<=3);
815 			int rr=weights_.dim(0);	// not the rank!
816 			if (weights_.size()==0) rr=0;
817 			const int kvec=this->kVec();
818 
819 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
820 				this->vector_[idim]=this->vector_[idim].reshape(rr,kvec);
821 			}
822 
823 			this->make_slices();
824 		}
825 
826 	public:
827 		/// return reference to one of the vectors F
ref_vector(const unsigned int & idim)828 		Tensor<T>& ref_vector(const unsigned int& idim) {
829 			return vector_[idim];
830 		}
831 
832 		/// return reference to one of the vectors F
ref_vector(const unsigned int & idim)833 		const Tensor<T>& ref_vector(const unsigned int& idim) const {
834 			return vector_[idim];
835 		}
836 
837 	private:
838 		/// return shallow copy of a slice of one of the vectors, flattened to (r,kVec)
flat_vector(const unsigned int & idim)839 		const Tensor<T> flat_vector(const unsigned int& idim) const {
840 		    MADNESS_ASSERT(rank()>0);
841 		    return vector_[idim](c0()).reshape(rank(),kVec());
842 		}
843 
844 		/// return shallow copy of a slice of one of the vectors, flattened to (r,kVec)
flat_vector(const unsigned int & idim)845 		Tensor<T> flat_vector(const unsigned int& idim) {
846 		    MADNESS_ASSERT(rank()>0);
847 		    return vector_[idim](c0()).reshape(rank(),kVec());
848 		}
849 
850 		/// fill this SRConf with 1 flattened random configurations (tested)
851 		void fillWithRandom(const long& rank=1) {
852 
853 			rank_=rank;
854 
855 			// assign; note that Slice(0,_) is inclusive
856 			weights_=Tensor<double>(rank);
857 			weights_=1.0;
858 
859 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
860 				vector_[idim]=Tensor<T>(rank_,this->kVec());
861 				vector_[idim].fillrandom();
862 			}
863 
864 			this->normalize();
865 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
866 				vector_[idim].scale(madness::RandomValue<T>()*scalar_type(10.0));
867 			}
868 			weights_(Slice(0,this->rank()-1)).fillrandom().scale(10.0);
869 			make_slices();
870             MADNESS_ASSERT(has_structure());
871 		}
872 
873 		/// normalize the vectors (tested)
normalize()874 		void normalize() {
875 
876 			if (type()==TT_FULL) return;
877 			if (rank()==0) return;
878             MADNESS_ASSERT(has_structure());
879 
880 	        // for convenience
881 	        const unsigned int rank=this->rank();
882 	        std::vector<Slice> s(dim_per_vector()+1,_);
883 //	        std::vector<Slice> s(2,_);
884 
885 	        // we calculate the norm sum_i < F^r_i | F^r_i > for each dimension for each r
886 
887 	        // loop over all configurations
888 	        for (unsigned int r=0; r<rank; r++) {
889 	            s[0]=Slice(r,r);
890 	        	// loop over all dimensions
891 	        	for (unsigned int idim=0; idim<dim_eff(); idim++) {
892 
893 //	        		Tensor<T> config=this->ref_vector(idim)(s);
894 //	        		const double norm=config.normf();
895 //	        		const double fac=norm;
896 //	        		double oofac=1.0/fac;
897 //	        		if (fac<1.e-13) oofac=0.0;
898 //	        		weights_(r)*=fac;
899 //	        		config.scale(oofac);
900 
901 //	        		const double norm=this->ref_vector(idim)(s).normf();
902 	        		const double norm=this->vector_[idim](s).normf();
903 	        		const double fac=norm;
904 	        		double oofac=1.0/fac;
905 	        		if (fac<1.e-13) oofac=0.0;
906 	        		weights_(r)*=fac;
907 //	        		this->ref_vector(idim)(s).scale(oofac);
908 //	        		this->flat_vector(idim)(s).scale(oofac);
909 	        		vector_[idim](s).scale(oofac);
910 
911 	        	}
912 	        }
913             MADNESS_ASSERT(has_structure());
914 		}
915 
916 		/// check if the terms are orthogonal
check_right_orthonormality()917 		bool check_right_orthonormality() const {
918 
919 			// fast return if possible
920 			if (rank()==0) return true;
921 
922 			MADNESS_ASSERT(type()==TT_2D);
923 
924 			const tensorT t1=ref_vector(1)(c0()).reshape(rank(),kVec());
925 			tensorT S=inner(t1,t1,1,1);
926 			for (int i=0; i<S.dim(0); i++) S(i,i)-=1.0;
927 
928 			// error per matrix element
929 			double norm=S.normf();
930 			double small=sqrt(norm*norm/S.size());
931 			return (small<1.e-13);
932 		}
933 
934 		/// return if this has only one additional dimension (apart from rank)
is_flat()935 		bool is_flat() const {
936 			return (vector_[0].ndim()==2);
937 		}
938 	public:
939 		/// return if this has a tensor structure (has not been flattened)
has_structure()940 		bool has_structure() const {
941             return (type()==TT_FULL or has_no_data() or vector_[0].dim(1)==this->get_k());
942 		}
943 
944 	private:
945 		/// return the dimension of this
dim()946 		unsigned int dim() const {return dim_;}
947 
948 		/// return the number of vectors
dim_eff()949 		unsigned int dim_eff() const {return vector_.size();}
950 
951 		/// return the logicalrank
rank()952 		long rank() const {return rank_;};
953 
954 		/// return the number of physical matrix elements per dimension
get_k()955 		unsigned int get_k() const {return maxk_;};
956 
957 		/// return the length of the vector (dim_pv*maxk)
kVec()958 		long kVec() const {
959 			const int dimpv=this->dim_per_vector();
960 			int kv=1;
961 			for (int i=0; i<dimpv; ++i) kv*=this->get_k();
962 //			const int kv1= pow(this->get_k(),this->dim_per_vector());
963 //			MADNESS_ASSERT(kv==kv1);
964 			return kv;
965 		}
966 
967 	public:
968 		/// return the number of physical dimensions
dim_per_vector()969 		int dim_per_vector() const {
970 			const int nvec=vector_.size();
971 			const int dim=this->dim();
972 			MADNESS_ASSERT(dim%nvec==0);
973 			return dim/nvec;
974 		}
975 
976 		/// return the weight
weights(const unsigned int & i)977 		double weights(const unsigned int& i) const {return weights_(i);};
978 
979         /// reconstruct this to return a full tensor
reconstruct()980         Tensor<T> reconstruct() const {
981 
982             if (type()==TT_FULL) return ref_vector(0);
983 
984             /*
985              * reconstruct the tensor first to the configurational dimension,
986              * then to the real dimension
987              */
988 
989             // for convenience
990             const unsigned int conf_dim=this->dim_eff();
991             const unsigned int conf_k=this->kVec();           // possibly k,k*k,..
992             const long rank=this->rank();
993             long d[TENSOR_MAXDIM];
994 
995             // fast return if possible
996             if (rank==0) {
997                 // reshape the tensor to the really required one
998                 const long k=this->get_k();
999                 const long dim=this->dim();
1000                 for (long i=0; i<dim; i++) d[i] = k;
1001 
1002                 return Tensor<T> (dim,d,true);
1003             }
1004 
1005 
1006             // set up result Tensor (in configurational dimensions)
1007             for (long i=0; i<conf_dim; i++) d[i] = conf_k;
1008             tensorT s(conf_dim,d,true);
1009 
1010             // flatten this
1011             SRConf<T> sr=*this;
1012 
1013             // and a scratch Tensor
1014             Tensor<T>  scr(rank);
1015             Tensor<T>  scr1(rank);
1016             Tensor<T>  scr2(rank);
1017 
1018             if (conf_dim==1) {
1019 
1020                 for (unsigned int i0=0; i0<conf_k; i0++) {
1021                     scr=sr.weights_;
1022                     //                  scr.emul(F[0][i0]);
1023                     T buffer=scr.sum();
1024                     s(i0)=buffer;
1025                 }
1026 
1027             } else if (conf_dim==2) {
1028 
1029 
1030                 //              tensorT weight_matrix(rank,rank);
1031                 //              for (unsigned int r=0; r<rank; r++) {
1032                 //                  weight_matrix(r,r)=this->weight(r);
1033                 //              }
1034                 //              s=inner(weight_matrix,sr._ptr->refVector(0));
1035                 //              s=inner(s,sr._ptr->refVector(1),0,0);
1036 //                tensorT sscr=copy(sr._ptr->ref_vector(0)(sr._ptr->c0()));
1037                 tensorT sscr=copy(sr.flat_vector(0));
1038                 for (unsigned int r=0; r<rank; r++) {
1039                     const double w=weights(r);
1040                     for (unsigned int k=0; k<conf_k; k++) {
1041                         sscr(r,k)*=w;
1042                     }
1043                 }
1044                 inner_result(sscr,sr.flat_vector(1),0,0,s);
1045 
1046 
1047             } else {
1048                 print("only config_dim=1,2 in SRConf::reconstruct");
1049                 MADNESS_ASSERT(0);
1050             }
1051 
1052 
1053             // reshape the tensor to the really required one
1054             const long k=this->get_k();
1055             const long dim=this->dim();
1056             for (long i=0; i<dim; i++) d[i] = k;
1057 
1058             Tensor<T> s2=s.reshape(dim,d);
1059             return s2;
1060         }
1061 
1062 
1063 	private:
1064 		/// return the number of coefficients
nCoeff()1065 		unsigned int nCoeff() const {
1066 			if (type()==TT_FULL) return ref_vector(0).size();
1067 			return this->dim_eff()*this->kVec()*this->rank();
1068 		};
1069 
1070 		/// return the real size of this
real_size()1071 		size_t real_size() const {
1072 			size_t n=0;
1073 			for (size_t i=0; i<vector_.size(); ++i) {
1074 				n+=vector_[i].size()*sizeof(T) + sizeof(tensorT);
1075 			}
1076 			n+=weights_.size()*sizeof(double) + sizeof(Tensor<double>);
1077 			n+=sizeof(*this);
1078 			n+=s_.size()*sizeof(Slice);
1079 			return n;
1080 		}
1081 
1082 		/// calculate the Frobenius inner product (tested)
1083 		template<typename Q>
TENSOR_RESULT_TYPE(T,Q)1084 		friend TENSOR_RESULT_TYPE(T,Q) overlap(const SRConf<T>& rhs, const SRConf<Q>& lhs) {
1085 
1086 			// fast return if either rank is 0
1087 			if ((lhs.has_no_data()) or (rhs.has_no_data())) return 0.0;
1088 
1089 			/*
1090 			 * the structure of an SRConf is (r,k) or (r,k',k), with
1091 			 * r the slowest index; the overlap is therefore simply calculated
1092 			 * as the matrix multiplication rhs*lhs^T
1093 			 */
1094 
1095 			// some checks
1096 			MADNESS_ASSERT(rhs.dim()==lhs.dim());
1097 			MADNESS_ASSERT(rhs.dim()>0);
1098 
1099 			typedef TENSOR_RESULT_TYPE(T,Q) resultT;
1100 
1101 			if (rhs.type()==TT_FULL) {
1102 				return rhs.ref_vector(0).trace(lhs.ref_vector(0));
1103 			}
1104 
1105 			const unsigned int dim_eff=rhs.dim_eff();
1106 
1107 			// get the weight matrix
1108 			Tensor<resultT> weightMatrix=outer(lhs.weights_(Slice(0,lhs.rank()-1)),
1109 					rhs.weights_(Slice(0,rhs.rank()-1)));
1110 
1111 			// calculate the overlap matrices for each dimension at a time
1112 			for (unsigned int idim=0; idim<dim_eff; idim++) {
1113 				const Tensor<T> lhs2=lhs.flat_vector(idim);
1114 				const Tensor<Q> rhs2=rhs.flat_vector(idim);
1115 				Tensor<resultT> ovlp(lhs.rank(),rhs.rank());
1116 				inner_result(lhs2,rhs2,-1,-1,ovlp);
1117 
1118 			    // multiply all overlap matrices with the weight matrix
1119 				weightMatrix.emul(ovlp);
1120 			}
1121 
1122 			//	return weightMatrix;
1123 			const TENSOR_RESULT_TYPE(T,Q) overlap=weightMatrix.sum();
1124 			return overlap;
1125 		}
1126 
1127 		/// calculate the Frobenius norm, if this is in SVD form
svd_normf()1128         typename TensorTypeData<T>::float_scalar_type svd_normf() const {
1129             if (has_no_data()) return 0.0;
1130             MADNESS_ASSERT(type()==TT_2D);
1131             return weights_(Slice(0,rank()-1)).normf();
1132         }
1133 
1134 		/// calculate the Frobenius norm
normf()1135 		typename TensorTypeData<T>::float_scalar_type normf() const {
1136 
1137 			// fast return if possible
1138 			if (has_no_data()) return 0.0;
1139 			if (type()==TT_FULL) return ref_vector(0).normf();
1140 
1141 			// some checks
1142 			MADNESS_ASSERT(dim()>0);
1143 			MADNESS_ASSERT(not TensorTypeData<T>::iscomplex);
1144 
1145 			// get the weight matrix
1146 			Tensor<T> weightMatrix=outer(weights_(Slice(0,rank()-1)),weights_(Slice(0,rank()-1)));
1147 
1148 			// calculate the overlap matrices for each dimension at a time
1149 			for (unsigned int idim=0; idim<dim_eff(); idim++) {
1150 				const Tensor<T> vec=flat_vector(idim);
1151 				Tensor<T> ovlp(rank(),rank());
1152 				inner_result(vec,vec,-1,-1,ovlp);
1153 
1154 			    // multiply all overlap matrices with the weight matrix
1155 				weightMatrix.emul(ovlp);
1156 			}
1157 
1158 			typedef typename TensorTypeData<T>::float_scalar_type resultT;
1159 			const resultT overlap=std::abs(weightMatrix.sum());
1160 			return sqrt(overlap);
1161 		}
1162 
1163 		/// scale this by a number
scale(const double & fac)1164 		void scale(const double& fac) {weights_.scale(fac);};
1165 
scale(const double_complex & fac)1166 		void scale(const double_complex& fac) {
1167 			MADNESS_EXCEPTION("no complex scaling in SRConf",1);
1168 		}
1169 
1170 		/// check compatibility
compatible(const SRConf & lhs,const SRConf & rhs)1171 		friend bool compatible(const SRConf& lhs, const SRConf& rhs) {
1172 			return ((lhs.dim()==rhs.dim()) and (lhs.dim_per_vector()==rhs.dim_per_vector()));
1173 		}
1174 
1175 	    /// \code
1176 		///     result(i,j,k,...) <-- sum(i',j', k',...) t(i',j',k',...)  c(i',i) c(j',j) c(k',k) ...
1177 		/// \endcode
1178 		///
1179 		/// The input dimensions of \c t must all be the same .
transform(const Tensor<T> & c)1180 		SRConf<T> transform(const Tensor<T>& c) const {
1181 
1182 			// fast return if possible
1183 			if (this->has_no_data()) {
1184 				return copy(*this);
1185 			}
1186 
1187 			// fast return for full rank tensor
1188 			if (type()==TT_FULL) {
1189 				return SRConf<T> (madness::transform(this->vector_[0],c));
1190 			}
1191 
1192 			// copying shrinks the vectors to (r,k,k,..)
1193 			SRConf<T> result=copy(*this);
1194 
1195 			// make sure this is not flattened
1196 			MADNESS_ASSERT(this->has_structure());
1197 
1198 			// these two loops go over all physical dimensions (dim = dim_eff * merged_dim)
1199 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
1200 				for (unsigned int jdim=1; jdim<this->ref_vector(idim).ndim(); jdim++) {
1201 
1202 					// note: tricky ordering (jdim is missing): this is actually correct!
1203 					// note: no slicing necessary, since we've copied this to result (incl shrinking)
1204 //					result.refVector_struct(idim)=madness::inner(result.refVector_struct(idim),c,1,0);
1205 					result.ref_vector(idim)=madness::inner(result.ref_vector(idim),c,1,0);
1206 
1207 				}
1208 			}
1209             MADNESS_ASSERT(result.has_structure());
1210 			return result;
1211 		}
1212 
1213 	    /// \code
1214 		///     result(i,j,k,...) <-- sum(i',j', k',...) t(i',j',k',...)  c(i',i) c(j',j) c(k',k) ...
1215 		/// \endcode
1216 		///
1217 		/// The input dimensions of \c t must all be the same .
1218 		template<typename Q>
general_transform(const Tensor<Q> c[])1219 		SRConf<TENSOR_RESULT_TYPE(T,Q) > general_transform(const Tensor<Q> c[]) const {
1220 
1221 			// fast return if possible
1222 			if (this->has_no_data()) return SRConf<T>(copy(*this));
1223 			if (type()==TT_FULL) {
1224 				return SRConf<T> (madness::general_transform(this->vector_[0],c));
1225 			}
1226 
1227 			// copying shrinks the vectors to (r,k,k,..)
1228 			SRConf<T> result=copy(*this);
1229 
1230 			// make sure this is not flattened
1231 			if (not this->has_structure()) {
1232 				print("no structure!");
1233 			}
1234 			MADNESS_ASSERT(this->has_structure());
1235 
1236 			long i=0;
1237 			// these two loops go over all physical dimensions (dim = dim_eff * merged_dim)
1238 			for (unsigned int idim=0; idim<this->dim_eff(); idim++) {
1239 				for (unsigned int jdim=1; jdim<this->ref_vector(idim).ndim(); jdim++) {
1240 
1241 					// note tricky ordering (jdim is missing): this is actually correct!
1242 					// note: no slicing necessary, since we've copied this to result (incl shrinking)
1243 					result.ref_vector(idim)=madness::inner(result.ref_vector(idim),c[i],1,0);
1244 					i++;
1245 
1246 				}
1247 			}
1248             MADNESS_ASSERT(result.has_structure());
1249 			return result;
1250 		}
1251 
transform_dir(const Tensor<T> & c,const int & axis)1252 		SRConf<T> transform_dir(const Tensor<T>& c, const int& axis) const {
1253 
1254 			if (this->has_no_data()) {
1255 				return SRConf<T>(copy(*this));
1256 			}
1257 
1258 			// fast return for full rank tensor
1259 			if (type()==TT_FULL) {
1260 				return SRConf<T> (madness::transform_dir(this->vector_[0],c,axis));
1261 			}
1262 
1263 			// copying shrinks the vectors to (r,k,k,..)
1264 			SRConf<T> result=copy(*this);
1265 
1266 			// only a matrix is allowed for c
1267 			MADNESS_ASSERT(c.ndim()==2);
1268 
1269 			// make sure this is not flattened
1270 			MADNESS_ASSERT(this->has_structure());
1271 
1272 			// compute idim for accessing the vector_, and the dimension inside vector_
1273 			// the +1 on jdim for the rank
1274 			const long idim=axis/this->dim_per_vector();
1275 			const long jdim=axis%this->dim_per_vector()+1;
1276 
1277 			// note: no slicing necessary, since we've copied this to result (incl shrinking)
1278 			result.ref_vector(idim)=madness::transform_dir(this->ref_vector(idim),c,jdim);
1279             MADNESS_ASSERT(result.has_structure());
1280 
1281 			return result;
1282 		}
1283 
1284 	};
1285 
1286 	/// sophisticated version of ortho2
1287 
1288 	/// after calling this we will have an optimally rank-reduced representation
1289 	/// with the left and right subspaces being bi-orthogonal and normalized;
1290 	/// outline of the algorithm:
1291 	///  - canonical orthogonalization of the subspaces (screen for small eigenvalues)
1292 	///  - SVD of the modified overlap (incorporates the roots of eigenvalues)
1293 	/// operation count is O(kr^2 + r^3)
1294 	///
1295 	/// @param[in,out]	x normalized left subspace
1296 	/// @param[in,out]	y normalize right subspace
1297 	/// @param[in,out]	weights weights
1298 	/// @param[in]		thresh	truncation threshold
1299 	template<typename T>
ortho3(Tensor<T> & x,Tensor<T> & y,Tensor<double> & weights,const double & thresh)1300 	void ortho3(Tensor<T>& x, Tensor<T>& y, Tensor<double>& weights, const double& thresh) {
1301 
1302 #ifdef BENCH
1303 		double cpu0=wall_time();
1304 #endif
1305 		typedef Tensor<T> tensorT;
1306 
1307 		const long rank=x.dim(0);
1308 		const double w_max=weights.absmax()*rank;		// max Frobenius norm
1309 
1310 
1311 		// overlap of 1 and 2
1312 		tensorT S1=inner(x,x,1,1);
1313 		tensorT S2=inner(y,y,1,1);	// 0.5 / 2.1
1314 #ifdef BENCH
1315 		double cpu1=wall_time();
1316 		SRConf<T>::time(1)+=(cpu1-cpu0);
1317 #endif
1318 
1319 //	    print("norm(S1)",S1.normf());
1320 //	    print("norm(S2)",S2.normf());
1321 
1322 		// diagonalize
1323 		tensorT U1, U2;
1324 		Tensor<double> e1, e2;
1325 	    syev(S1,U1,e1);
1326 	    syev(S2,U2,e2);										// 2.3 / 4.0
1327 #ifdef BENCH
1328 		double cpu3=wall_time();
1329 		SRConf<T>::time(3)+=cpu3-cpu1;
1330 #endif
1331 
1332 	    const double e1_max=e1.absmax();
1333 	    const double e2_max=e2.absmax();
1334 
1335 		// fast return if possible
1336 		if ((e1_max*w_max<thresh) or (e2_max*w_max<thresh)) {
1337 			x.clear();
1338 			y.clear();
1339 			weights.clear();
1340 			return;
1341 		}
1342 
1343 	    // remove small negative eigenvalues
1344 	    e1.screen(1.e-13);
1345 	    e2.screen(1.e-13);
1346 	    Tensor<double> sqrt_e1(rank), sqrt_e2(rank);
1347 
1348 
1349 	    // shrink U1, U2
1350 	    int lo1=0;
1351 	    int lo2=0;
1352 	    for (unsigned int r=0; r<rank; r++) {
1353 	    	if (e1(r)*w_max<thresh) lo1=r+1;
1354 	    	if (e2(r)*w_max<thresh) lo2=r+1;
1355 	    	sqrt_e1(r)=sqrt(std::abs(e1(r)));
1356 	    	sqrt_e2(r)=sqrt(std::abs(e2(r)));
1357 	    }
1358 
1359 	    U1=U1(Slice(_),Slice(lo1,-1));
1360 	    U2=U2(Slice(_),Slice(lo2,-1));
1361 	    sqrt_e1=sqrt_e1(Slice(lo1,-1));
1362 	    sqrt_e2=sqrt_e2(Slice(lo2,-1));
1363 	    unsigned int rank1=rank-lo1;
1364 	    unsigned int rank2=rank-lo2;						// 0.0 / 0.0
1365 
1366 
1367 	    MADNESS_ASSERT(sqrt_e1.size()==rank1);
1368 	    MADNESS_ASSERT(sqrt_e2.size()==rank2);
1369 #ifdef BENCH
1370 		double cpu4=wall_time();
1371 		SRConf<T>::time(4)+=cpu4-cpu3;
1372 #endif
1373 
1374 
1375 	    // set up overlap M; include X+
1376 	    tensorT M(rank1,rank2);
1377 	    for (unsigned int i=0; i<rank1; i++) {
1378 	    	for (unsigned int j=0; j<rank2; j++) {
1379 	    		for (unsigned int r=0; r<rank; r++) {
1380 		    		M(i,j)+=U1(r,i)*sqrt_e1(i)*weights(r)*U2(r,j) * sqrt_e2(j);
1381 //			    		M(i,j)+=U1(r,i)*weights(r)*U2(r,j);
1382 	    		}
1383 	    	}
1384 	    }
1385 
1386 
1387 	    // include X-
1388     	for (unsigned int r=0; r<rank1; r++) {
1389     		double fac=1.0/sqrt_e1(r);
1390     		for (unsigned int t=0; t<rank; t++) {
1391 	    		U1(t,r)*=fac;
1392 //	    		if (sqrt_e1(r)<thresh) throw;
1393     		}
1394     	}
1395 
1396 	   	for (unsigned int r=0; r<rank2; r++) {
1397     		double fac=1.0/sqrt_e2(r);
1398     		for (unsigned int t=0; t<rank; t++) {
1399 	    		U2(t,r)*=fac;
1400 //	    		if (sqrt_e2(r)<thresh) throw;
1401 	    	}
1402 	    }													// 0.2 / 0.6
1403 #ifdef BENCH
1404 		double cpu5=wall_time();
1405 		SRConf<T>::time(5)+=cpu5-cpu4;
1406 #endif
1407 
1408 	    // decompose M
1409 		tensorT Up,VTp;
1410 		Tensor<double> Sp;
1411 		svd(M,Up,Sp,VTp);									// 1.5 / 3.0
1412 #ifdef BENCH
1413 		double cpu6=wall_time();
1414 		SRConf<T>::time(6)+=cpu6-cpu5;
1415 #endif
1416 
1417 		// make transformation matrices
1418 		Up=inner(Up,U1,0,1);
1419 		VTp=inner(VTp,U2,1,1);
1420 
1421 
1422 
1423 //		// find the maximal singular value that's supposed to contribute
1424 //		// singular values are ordered (largest first)
1425 //		double residual=0.0;
1426 //		long i;
1427 //		for (i=Sp.dim(0)-1; i>=0; i--) {
1428 //			residual+=Sp(i)*Sp(i);
1429 //			if (residual>thresh*thresh) break;
1430 //		}
1431 		long i=SRConf<T>::max_sigma(thresh,Sp.dim(0),Sp);
1432 
1433 #ifdef BENCH
1434 		double cpu7=wall_time();
1435 		SRConf<T>::time(7)+=cpu7-cpu6;
1436 #endif
1437 
1438 //		i=std::min(i,long(0));
1439 
1440 	    Up=Up(Slice(0,i),Slice(_));
1441 	    VTp=VTp(Slice(0,i),Slice(_));
1442 
1443 
1444 		// convert SVD output to our convention
1445 		if (i>=0) {
1446 
1447 			// transform 1 and 2
1448 		    x=inner(Up,x,1,0);
1449 		    y=inner(VTp,y,1,0);				// 0.5 / 2.5
1450 		    weights=Sp(Slice(0,i));
1451 
1452 		} else {
1453 			x.clear();
1454 			y.clear();
1455 			weights.clear();
1456 		}
1457 #ifdef BENCH
1458 		double cpu8=wall_time();
1459 		SRConf<T>::time(8)+=cpu8-cpu7;
1460 		SRConf<T>::time(0)+=cpu8-cpu0;
1461 #endif
1462 		return;
1463 	}
1464 
1465 	/// specialized version of ortho3
1466 
1467 	/// does the same as ortho3, but takes two bi-orthonormal configs as input
1468 	/// and saves on the inner product. Result will be written onto the first config
1469 	///
1470 	/// @param[in,out]	x1	left subspace, will hold the result on exit
1471 	/// @param[in,out]	y1	right subspace, will hold the result on exit
1472 	/// @param[in]		x2	left subspace, will be accumulated onto x1
1473 	/// @param[in]		y2	right subspace, will be accumulated onto y1
1474 	template<typename T>
ortho5(Tensor<T> & x1,Tensor<T> & y1,Tensor<double> & w1,const Tensor<T> & x2,const Tensor<T> & y2,const Tensor<double> & w2,const double & thresh)1475 	void ortho5(Tensor<T>& x1, Tensor<T>& y1, Tensor<double>& w1,
1476 				const Tensor<T>& x2, const Tensor<T>& y2, const Tensor<double>& w2,
1477 				const double& thresh) {
1478 
1479 #ifdef BENCH
1480 		double cpu0=wall_time();
1481 #endif
1482 		typedef Tensor<T> tensorT;
1483 
1484 		const long rank1=x1.dim(0);
1485 		const long rank2=x2.dim(0);
1486 		const long rank=rank1+rank2;
1487 
1488 		// for convenience: blocks of the matrices
1489 		const Slice s0(0,rank1-1), s1(rank1,rank-1);
1490 
1491 		const double w_max=std::max(w1.absmax(),w2.absmax());
1492 		const double norm_max=w_max*rank;		// max Frobenius norm
1493 
1494 		// the overlap between 1 and 2;
1495 		// the overlap of 1 and 1, and 2 and 2 is assumed to be the identity matrix
1496 		tensorT Sx12=inner(x1,x2,1,1);
1497 		tensorT Sy12=inner(y1,y2,1,1);
1498 #ifdef BENCH
1499 		double cpu1=wall_time();
1500 		SRConf<T>::time(11)+=cpu1-cpu0;
1501 #endif
1502 
1503 		tensorT Sx(rank,rank);
1504 		tensorT Sy(rank,rank);
1505 
1506 		// the identity matrix (half of it)
1507 		for (long i=0; i<rank; i++) {
1508 			Sx(i,i)=0.5;
1509 			Sy(i,i)=0.5;
1510 		}
1511 		Sx(s0,s1)=Sx12;
1512 		Sy(s0,s1)=Sy12;
1513 		Sx+=transpose(Sx);
1514 		Sy+=transpose(Sy);
1515 
1516 		// overlap of 1 and 2
1517 //		tensorT S1=inner(x,x,1,1);
1518 //		tensorT S2=inner(y,y,1,1);	// 0.5 / 2.1
1519 #ifdef BENCH
1520 		double cpu2=wall_time();
1521 		SRConf<T>::time(12)+=cpu2-cpu1;
1522 #endif
1523 
1524 		// diagonalize
1525 		tensorT U1, U2;
1526 		Tensor<double> e1, e2;
1527 	    syev(Sx,U1,e1);
1528 	    syev(Sy,U2,e2);										// 2.3 / 4.0
1529 #ifdef BENCH
1530 		double cpu3=wall_time();
1531 		SRConf<T>::time(13)+=cpu3-cpu2;
1532 #endif
1533 
1534 //	    print("norm(Sx)",Sx.normf());
1535 //	    print("norm(Sy)",Sy.normf());
1536 
1537 	    const double e1_max=e1.absmax();
1538 	    const double e2_max=e2.absmax();
1539 
1540 		// fast return if possible
1541 		if ((e1_max*norm_max<thresh) or (e2_max*norm_max<thresh)) {
1542 			x1.clear();
1543 			y1.clear();
1544 			w1.clear();
1545 			return;
1546 		}
1547 
1548 	    // remove small negative eigenvalues
1549 	    e1.screen(1.e-13);
1550 	    e2.screen(1.e-13);
1551 	    Tensor<double> sqrt_e1(rank), sqrt_e2(rank);
1552 
1553 
1554 	    // shrink U1, U2
1555 	    int lo1=0;
1556 	    int lo2=0;
1557 	    for (unsigned int r=0; r<rank; r++) {
1558 	    	if (e1(r)<thresh/norm_max) lo1=r+1;
1559 	    	else sqrt_e1(r)=sqrt(std::abs(e1(r)));
1560 	    	if (e2(r)<thresh/norm_max) lo2=r+1;
1561 	    	else sqrt_e2(r)=sqrt(std::abs(e2(r)));
1562 	    }
1563 
1564 	    U1=U1(Slice(_),Slice(lo1,-1));
1565 	    U2=U2(Slice(_),Slice(lo2,-1));
1566 	    sqrt_e1=sqrt_e1(Slice(lo1,-1));
1567 	    sqrt_e2=sqrt_e2(Slice(lo2,-1));
1568 	    unsigned int rank_x=rank-lo1;
1569 	    unsigned int rank_y=rank-lo2;						// 0.0 / 0.0
1570 
1571 
1572 //	    MADNESS_ASSERT(sqrt_e1.size()==rank_x);
1573 //	    MADNESS_ASSERT(sqrt_e2.size()==rank_y);
1574 
1575 	    // set up overlap M; include X+
1576 
1577 //	    for (unsigned int i=0; i<rank_x; ++i) U(i,_)*=sqrt_e1(i);
1578 //	    for (unsigned int i=0; i<rank_y; ++i) U(i,_)*=sqrt_e2(i);
1579 
1580 	    tensorT UU1=copy(U1);
1581 	    for (unsigned int i=0; i<rank1; ++i) UU1(i,_)*=w1(i);
1582 	    for (unsigned int i=rank1; i<rank; ++i) UU1(i,_)*=w2(i-rank1);
1583 
1584 	    tensorT M=inner(UU1,U2,0,0);
1585 	    tensorT ee=outer(sqrt_e1,sqrt_e2);
1586 	    M.emul(ee);
1587 
1588 	    // include X-
1589     	for (unsigned int r=0; r<rank_x; r++) {
1590     		double fac=1.0/sqrt_e1(r);
1591     		U1(_,r)*=fac;
1592 //    		for (unsigned int t=0; t<rank; t++) {
1593 //	    		U1(t,r)*=fac;
1594 ////	    		if (sqrt_e1(r)<thresh) throw;
1595 //    		}
1596     	}
1597 
1598 	   	for (unsigned int r=0; r<rank_y; r++) {
1599     		double fac=1.0/sqrt_e2(r);
1600     		U2(_,r)*=fac;
1601 //    		for (unsigned int t=0; t<rank; t++) {
1602 //	    		U2(t,r)*=fac;
1603 ////	    		if (sqrt_e2(r)<thresh) throw;
1604 //	    	}
1605 	    }													// 0.2 / 0.6
1606 #ifdef BENCH
1607 		double cpu4=wall_time();
1608 		SRConf<T>::time(14)+=cpu4-cpu3;
1609 #endif
1610 
1611 
1612 	    // decompose M
1613 		tensorT Up,VTp;
1614 		Tensor<double> Sp;
1615 		svd(M,Up,Sp,VTp);									// 1.5 / 3.0
1616 #ifdef BENCH
1617 		double cpu5=wall_time();
1618 		SRConf<T>::time(15)+=cpu5-cpu4;
1619 #endif
1620 
1621 		// make transformation matrices
1622 		Up=inner(Up,U1,0,1);
1623 		VTp=inner(VTp,U2,1,1);
1624 #ifdef BENCH
1625 		double cpu6=wall_time();
1626 		SRConf<T>::time(16)+=cpu6-cpu5;
1627 #endif
1628 
1629 		// find the maximal singular value that's supposed to contribute
1630 		// singular values are ordered (largest first)
1631 		double residual=0.0;
1632 		long i;
1633 		for (i=Sp.dim(0)-1; i>=0; i--) {
1634 			residual+=Sp(i)*Sp(i);
1635 			if (residual>thresh*thresh) break;
1636 		}
1637 
1638 		// convert SVD output to our convention
1639 		if (i>=0) {
1640 
1641 			// make it contiguous
1642 		    tensorT Up1=transpose(Up(Slice(0,i),s0));
1643 		    tensorT Up2=transpose(Up(Slice(0,i),s1));
1644 		    tensorT VTp1=transpose(VTp(Slice(0,i),s0));
1645 		    tensorT VTp2=transpose(VTp(Slice(0,i),s1));
1646 
1647 			// transform 1 and 2
1648 		    x1=inner(Up1,x1,0,0);
1649 		    inner_result(Up2,x2,0,0,x1);
1650 		    y1=inner(VTp1,y1,0,0);
1651 		    inner_result(VTp2,y2,0,0,y1);
1652 		    w1=Sp(Slice(0,i));
1653 
1654 		} else {
1655 			x1.clear();
1656 			y1.clear();
1657 			w1.clear();
1658 		}
1659 #ifdef BENCH
1660 		double cpu7=wall_time();
1661 		SRConf<T>::time(17)+=cpu7-cpu6;
1662 		SRConf<T>::time(10)+=cpu7-cpu0;
1663 #endif
1664 		return;
1665 	}
1666 
1667 	template<typename T>
1668 	static inline
1669 	std::ostream& operator<<(std::ostream& s, const SRConf<T>& sr) {
1670 
1671 		s << "dim_          " << sr.dim_ << "\n";;
1672 		s << "rank_         " << sr.rank_ << "\n";;
1673 		s << "maxk_         " << sr.maxk_ << "\n";;
1674 		s << "vector_.size()" << sr.vector_.size() << "\n";
1675 		s << "has_data()    " << sr.has_data() << "\n";
1676 		s << "TensorType    " << sr.type() << "\n\n";
1677 		return s;
1678 	}
1679 }
1680 
1681 #endif /* SRCONF_H_ */
1682