1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 
31   $Id$
32 */
33 #ifndef MADNESS_MRA_VMRA_H__INCLUDED
34 #define MADNESS_MRA_VMRA_H__INCLUDED
35 
36 /*!
37 	\file vmra.h
38 	\brief Defines operations on vectors of Functions
39 	\ingroup mra
40 
41 	This file defines a number of operations on vectors of functions.
42 	Assume v is a vector of NDIM-D functions of a certain type.
43 
44 
45 	Operations on array of functions
46 
47 	*) copying: deep copying of vectors of functions to vector of functions
48 	\code
49 	vector2 = copy(world, vector1,fence);
50 	\endcode
51 
52 	*) compress: convert multiwavelet representation to legendre representation
53 	\code
54 	compress(world, vector, fence);
55 	\endcode
56 
57 	*) reconstruct: convert representation to multiwavelets
58 	\code
59 	reconstruct(world, vector, fence);
60 	\endcode
61 
62 	*) nonstandard: convert to non-standard form
63 	\code
64 	nonstandard(world, v, fence);
65 	\endcode
66 
67 	*) standard: convert to standard form
68 	\code
69 	standard(world, v, fence);
70 	\endcode
71 
72 	*) truncate: truncating vectors of functions to desired precision
73 	\code
74 	truncate(world, v, tolerance, fence);
75 	\endcode
76 
77 
78 	*) zero function: create a vector of zero functions of length n
79 	\code
80 	v=zero(world, n);
81 	\endcode
82 
83 	*) transform: transform a representation from one basis to another
84 	\code
85 	transform(world, vector, tensor, tolerance, fence )
86 	\endcode
87 
88 	Setting thresh-hold for precision
89 
90 	*) set_thresh: setting a finite thresh-hold for a vector of functions
91 	\code
92 	void set_thresh(World& world, std::vector< Function<T,NDIM> >& v, double thresh, bool fence=true);
93 	\endcode
94 
95 	Arithmetic Operations on arrays of functions
96 
97 	*) conjugation: conjugate a vector of complex functions
98 
99 	*) add
100 	*) sub
101 	*) mul
102 	   - mul_sparse
103 	*) square
104 	*) gaxpy
105 	*) apply
106 
107 	Norms, inner-products, blas-1 like operations on vectors of functions
108 
109 	*) inner
110 	*) matrix_inner
111 	*) norm_tree
112 	*) normalize
113 	*) norm2
114 	    - norm2s
115 	*) scale(world, v, alpha);
116 
117 
118 
119 
120 */
121 
122 #include <madness/mra/mra.h>
123 #include <madness/mra/derivative.h>
124 #include <madness/tensor/distributed_matrix.h>
125 #include <cstdio>
126 
127 namespace madness {
128 
129 
130 
131     /// Compress a vector of functions
132     template <typename T, std::size_t NDIM>
133     void compress(World& world,
134                   const std::vector< Function<T,NDIM> >& v,
135                   bool fence=true) {
136 
137         PROFILE_BLOCK(Vcompress);
138         bool must_fence = false;
139         for (unsigned int i=0; i<v.size(); ++i) {
140             if (!v[i].is_compressed()) {
141                 v[i].compress(false);
142                 must_fence = true;
143             }
144         }
145 
146         if (fence && must_fence) world.gop.fence();
147     }
148 
149 
150     /// Reconstruct a vector of functions
151     template <typename T, std::size_t NDIM>
152     void reconstruct(World& world,
153                      const std::vector< Function<T,NDIM> >& v,
154                      bool fence=true) {
155         PROFILE_BLOCK(Vreconstruct);
156         bool must_fence = false;
157         for (unsigned int i=0; i<v.size(); ++i) {
158             if (v[i].is_compressed()) {
159                 v[i].reconstruct(false);
160                 must_fence = true;
161             }
162         }
163 
164         if (fence && must_fence) world.gop.fence();
165     }
166 
167     /// refine the functions according to the autorefine criteria
168     template <typename T, std::size_t NDIM>
169     void refine(World& world, const std::vector<Function<T,NDIM> >& vf,
170             bool fence=true) {
171         for (const auto& f : vf) f.refine(false);
172         if (fence) world.gop.fence();
173     }
174 
175     /// refine all functions to a common (finest) level
176 
177     /// if functions are not initialized (impl==NULL) they are ignored
178     template <typename T, std::size_t NDIM>
179     void refine_to_common_level(World& world, std::vector<Function<T,NDIM> >& vf,
180             bool fence=true) {
181 
182         reconstruct(world,vf);
183         Key<NDIM> key0(0, Vector<Translation, NDIM> (0));
184         std::vector<FunctionImpl<T,NDIM>*> v_ptr;
185 
186         // push initialized function pointers into the vector v_ptr
187         for (unsigned int i=0; i<vf.size(); ++i) {
188             if (vf[i].is_initialized()) v_ptr.push_back(vf[i].get_impl().get());
189         }
190 
191         // sort and remove duplicates to not confuse the refining function
192         std::sort(v_ptr.begin(),v_ptr.end());
193         typename std::vector<FunctionImpl<T, NDIM>*>::iterator it;
194         it = std::unique(v_ptr.begin(), v_ptr.end());
195         v_ptr.resize( std::distance(v_ptr.begin(),it) );
196 
197         std::vector< Tensor<T> > c(v_ptr.size());
198         v_ptr[0]->refine_to_common_level(v_ptr, c, key0);
199         if (fence) v_ptr[0]->world.gop.fence();
200         if (VERIFY_TREE)
201             for (unsigned int i=0; i<vf.size(); i++) vf[i].verify_tree();
202     }
203 
204     /// Generates non-standard form of a vector of functions
205     template <typename T, std::size_t NDIM>
206     void nonstandard(World& world,
207                      std::vector< Function<T,NDIM> >& v,
208                      bool fence=true) {
209         PROFILE_BLOCK(Vnonstandard);
210         reconstruct(world, v);
211         for (unsigned int i=0; i<v.size(); ++i) {
212             v[i].nonstandard(false,false);
213         }
214         if (fence) world.gop.fence();
215     }
216 
217 
218     /// Generates standard form of a vector of functions
219     template <typename T, std::size_t NDIM>
220     void standard(World& world,
221                   std::vector< Function<T,NDIM> >& v,
222                   bool fence=true) {
223         PROFILE_BLOCK(Vstandard);
224         for (unsigned int i=0; i<v.size(); ++i) {
225             v[i].standard(false);
226         }
227         if (fence) world.gop.fence();
228     }
229 
230 
231     /// Truncates a vector of functions
232     template <typename T, std::size_t NDIM>
233     void truncate(World& world,
234                   std::vector< Function<T,NDIM> >& v,
235                   double tol=0.0,
236                   bool fence=true) {
237         PROFILE_BLOCK(Vtruncate);
238 
239         compress(world, v);
240 
241         for (unsigned int i=0; i<v.size(); ++i) {
242             v[i].truncate(tol, false);
243         }
244 
245         if (fence) world.gop.fence();
246     }
247 
248     /// Truncates a vector of functions
249 
250     /// @return the truncated vector for chaining
251     template <typename T, std::size_t NDIM>
252     std::vector< Function<T,NDIM> > truncate(std::vector< Function<T,NDIM> > v,
253                   double tol=0.0, bool fence=true) {
254         if (v.size()>0) truncate(v[0].world(),v,tol,fence);
255         return v;
256     }
257 
258 
259     /// Applies a derivative operator to a vector of functions
260     template <typename T, std::size_t NDIM>
261     std::vector< Function<T,NDIM> >
262     apply(World& world,
263           const Derivative<T,NDIM>& D,
264           const std::vector< Function<T,NDIM> >& v,
265           bool fence=true)
266     {
267         reconstruct(world, v);
268         std::vector< Function<T,NDIM> > df(v.size());
269         for (unsigned int i=0; i<v.size(); ++i) {
270             df[i] = D(v[i],false);
271         }
272         if (fence) world.gop.fence();
273         return df;
274     }
275 
276     /// Generates a vector of zero functions (reconstructed)
277     template <typename T, std::size_t NDIM>
278     std::vector< Function<T,NDIM> >
279     zero_functions(World& world, int n, bool fence=true) {
280         PROFILE_BLOCK(Vzero_functions);
281         std::vector< Function<T,NDIM> > r(n);
282         for (int i=0; i<n; ++i)
283   	    r[i] = Function<T,NDIM>(FunctionFactory<T,NDIM>(world).fence(false));
284 
285 	if (n && fence) world.gop.fence();
286 
287         return r;
288     }
289 
290     /// Generates a vector of zero functions (compressed)
291     template <typename T, std::size_t NDIM>
292     std::vector< Function<T,NDIM> >
293     zero_functions_compressed(World& world, int n, bool fence=true) {
294         PROFILE_BLOCK(Vzero_functions);
295         std::vector< Function<T,NDIM> > r(n);
296         for (int i=0; i<n; ++i)
297   	    r[i] = Function<T,NDIM>(FunctionFactory<T,NDIM>(world).fence(false).compressed(true).initial_level(1));
298 
299 	if (n && fence) world.gop.fence();
300 
301         return r;
302     }
303 
304 
305     /// Transforms a vector of functions according to new[i] = sum[j] old[j]*c[j,i]
306 
307     /// Uses sparsity in the transformation matrix --- set small elements to
308     /// zero to take advantage of this.
309     template <typename T, typename R, std::size_t NDIM>
310     std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> >
311     transform(World& world,
312               const std::vector< Function<T,NDIM> >& v,
313               const Tensor<R>& c,
314               bool fence=true) {
315 
316         PROFILE_BLOCK(Vtransformsp);
317         typedef TENSOR_RESULT_TYPE(T,R) resultT;
318         int n = v.size();  // n is the old dimension
319         int m = c.dim(1);  // m is the new dimension
320         MADNESS_ASSERT(n==c.dim(0));
321 
322         std::vector< Function<resultT,NDIM> > vc = zero_functions_compressed<resultT,NDIM>(world, m);
323         compress(world, v);
324 
325         for (int i=0; i<m; ++i) {
326             for (int j=0; j<n; ++j) {
327                 if (c(j,i) != R(0.0)) vc[i].gaxpy(1.0,v[j],c(j,i),false);
328             }
329         }
330 
331         if (fence) world.gop.fence();
332         return vc;
333     }
334 
335 
336     template <typename L, typename R, std::size_t NDIM>
337     std::vector< Function<TENSOR_RESULT_TYPE(L,R),NDIM> >
transform(World & world,const std::vector<Function<L,NDIM>> & v,const Tensor<R> & c,double tol,bool fence)338     transform(World& world,  const std::vector< Function<L,NDIM> >& v,
339             const Tensor<R>& c, double tol, bool fence) {
340         PROFILE_BLOCK(Vtransform);
341         MADNESS_ASSERT(v.size() == (unsigned int)(c.dim(0)));
342 
343         std::vector< Function<TENSOR_RESULT_TYPE(L,R),NDIM> > vresult
344             = zero_functions_compressed<TENSOR_RESULT_TYPE(L,R),NDIM>(world, c.dim(1));
345 
346         compress(world, v, true);
347         vresult[0].vtransform(v, c, vresult, tol, fence);
348         return vresult;
349     }
350 
351     template <typename T, typename R, std::size_t NDIM>
352     std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> >
353     transform(World& world,
354               const std::vector< Function<T,NDIM> >& v,
355               const DistributedMatrix<R>& c,
356               bool fence=true) {
357         PROFILE_FUNC;
358 
359         typedef TENSOR_RESULT_TYPE(T,R) resultT;
360         long n = v.size();    // n is the old dimension
361         long m = c.rowdim();  // m is the new dimension
362         MADNESS_ASSERT(n==c.coldim());
363 
364         // new(i) = sum(j) old(j) c(j,i)
365 
366         Tensor<T> tmp(n,m);
367         c.copy_to_replicated(tmp); // for debugging
368         tmp = transpose(tmp);
369 
370         std::vector< Function<resultT,NDIM> > vc = zero_functions_compressed<resultT,NDIM>(world, m);
371         compress(world, v);
372 
373         for (int i=0; i<m; ++i) {
374             for (int j=0; j<n; ++j) {
375                 if (tmp(j,i) != R(0.0)) vc[i].gaxpy(1.0,v[j],tmp(j,i),false);
376             }
377         }
378 
379         if (fence) world.gop.fence();
380         return vc;
381     }
382 
383 
384     /// Scales inplace a vector of functions by distinct values
385     template <typename T, typename Q, std::size_t NDIM>
386     void scale(World& world,
387                std::vector< Function<T,NDIM> >& v,
388                const std::vector<Q>& factors,
389                bool fence=true) {
390         PROFILE_BLOCK(Vscale);
391         for (unsigned int i=0; i<v.size(); ++i) v[i].scale(factors[i],false);
392         if (fence) world.gop.fence();
393     }
394 
395     /// Scales inplace a vector of functions by the same
396     template <typename T, typename Q, std::size_t NDIM>
397     void scale(World& world,
398                std::vector< Function<T,NDIM> >& v,
399 	       const Q factor,
400                bool fence=true) {
401         PROFILE_BLOCK(Vscale);
402         for (unsigned int i=0; i<v.size(); ++i) v[i].scale(factor,false);
403         if (fence) world.gop.fence();
404     }
405 
406     /// Computes the 2-norms of a vector of functions
407     template <typename T, std::size_t NDIM>
norm2s(World & world,const std::vector<Function<T,NDIM>> & v)408     std::vector<double> norm2s(World& world,
409                               const std::vector< Function<T,NDIM> >& v) {
410         PROFILE_BLOCK(Vnorm2);
411         std::vector<double> norms(v.size());
412         for (unsigned int i=0; i<v.size(); ++i) norms[i] = v[i].norm2sq_local();
413         world.gop.sum(&norms[0], norms.size());
414         for (unsigned int i=0; i<v.size(); ++i) norms[i] = sqrt(norms[i]);
415         world.gop.fence();
416         return norms;
417     }
418 
419     /// Computes the 2-norm of a vector of functions
420     template <typename T, std::size_t NDIM>
norm2(World & world,const std::vector<Function<T,NDIM>> & v)421     double norm2(World& world,
422                               const std::vector< Function<T,NDIM> >& v) {
423         PROFILE_BLOCK(Vnorm2);
424         std::vector<double> norms(v.size());
425         for (unsigned int i=0; i<v.size(); ++i) norms[i] = v[i].norm2sq_local();
426         world.gop.sum(&norms[0], norms.size());
427         for (unsigned int i=1; i<v.size(); ++i) norms[0] += norms[i];
428         world.gop.fence();
429         return sqrt(norms[0]);
430     }
431 
conj(double x)432     inline double conj(double x) {
433         return x;
434     }
435 
conj(float x)436     inline double conj(float x) {
437         return x;
438     }
439 
440 // !!! FIXME: this task is broken because FunctionImpl::inner_local forces a
441 // future on return from WorldTaskQueue::reduce, which will causes a deadlock if
442 // run inside a task. This behavior must be changed before this task can be used
443 // again.
444 //
445 //    template <typename T, typename R, std::size_t NDIM>
446 //    struct MatrixInnerTask : public TaskInterface {
447 //        Tensor<TENSOR_RESULT_TYPE(T,R)> result; // Must be a copy
448 //        const Function<T,NDIM>& f;
449 //        const std::vector< Function<R,NDIM> >& g;
450 //        long jtop;
451 //
452 //        MatrixInnerTask(const Tensor<TENSOR_RESULT_TYPE(T,R)>& result,
453 //                        const Function<T,NDIM>& f,
454 //                        const std::vector< Function<R,NDIM> >& g,
455 //                        long jtop)
456 //                : result(result), f(f), g(g), jtop(jtop) {}
457 //
458 //        void run(World& world) {
459 //            for (long j=0; j<jtop; ++j) {
460 //                result(j) = f.inner_local(g[j]);
461 //            }
462 //        }
463 //
464 //    private:
465 //        /// Get the task id
466 //
467 //        /// \param id The id to set for this task
468 //        virtual void get_id(std::pair<void*,unsigned short>& id) const {
469 //            PoolTaskInterface::make_id(id, *this);
470 //        }
471 //    }; // struct MatrixInnerTask
472 
473 
474 
475     template <typename T, std::size_t NDIM>
476     DistributedMatrix<T> matrix_inner(const DistributedMatrixDistribution& d,
477                                       const std::vector< Function<T,NDIM> >& f,
478                                       const std::vector< Function<T,NDIM> >& g,
479                                       bool sym=false)
480     {
481         PROFILE_FUNC;
482         DistributedMatrix<T> A(d);
483         const int64_t n = A.coldim();
484         const int64_t m = A.rowdim();
485         MADNESS_ASSERT(int64_t(f.size()) == n && int64_t(g.size()) == m);
486 
487         // Assume we can always create an ichunk*jchunk matrix locally
488         const int ichunk = 1000;
489         const int jchunk = 1000; // 1000*1000*8 = 8 MBytes
490         for (int64_t ilo=0; ilo<n; ilo+=ichunk) {
491             int64_t ihi = std::min(ilo + ichunk, n);
492             std::vector< Function<T,NDIM> > ivec(f.begin()+ilo, f.begin()+ihi);
493             for (int64_t jlo=0; jlo<m; jlo+=jchunk) {
494                 int64_t jhi = std::min(jlo + jchunk, m);
495                 std::vector< Function<T,NDIM> > jvec(g.begin()+jlo, g.begin()+jhi);
496 
497                 Tensor<T> P = matrix_inner(A.get_world(),ivec,jvec);
498                 A.copy_from_replicated_patch(ilo, ihi-1, jlo, jhi-1, P);
499             }
500         }
501         return A;
502     }
503 
504     /// Computes the matrix inner product of two function vectors - q(i,j) = inner(f[i],g[j])
505 
506     /// For complex types symmetric is interpreted as Hermitian.
507     ///
508     /// The current parallel loop is non-optimal but functional.
509     template <typename T, typename R, std::size_t NDIM>
510     Tensor< TENSOR_RESULT_TYPE(T,R) > matrix_inner(World& world,
511                                                    const std::vector< Function<T,NDIM> >& f,
512                                                    const std::vector< Function<R,NDIM> >& g,
513                                                    bool sym=false)
514     {
515         world.gop.fence();
516         compress(world, f);
517         if ((void*)(&f) != (void*)(&g)) compress(world, g);
518 
519         std::vector<const FunctionImpl<T,NDIM>*> left(f.size());
520         std::vector<const FunctionImpl<R,NDIM>*> right(g.size());
521         for (unsigned int i=0; i<f.size(); i++) left[i] = f[i].get_impl().get();
522         for (unsigned int i=0; i<g.size(); i++) right[i]= g[i].get_impl().get();
523 
524         Tensor< TENSOR_RESULT_TYPE(T,R) > r= FunctionImpl<T,NDIM>::inner_local(left, right, sym);
525 
526         world.gop.fence();
527         world.gop.sum(r.ptr(),f.size()*g.size());
528 
529         return r;
530     }
531 
532     /// Computes the matrix inner product of two function vectors - q(i,j) = inner(f[i],g[j])
533 
534     /// For complex types symmetric is interpreted as Hermitian.
535     ///
536     /// The current parallel loop is non-optimal but functional.
537     template <typename T, typename R, std::size_t NDIM>
538     Tensor< TENSOR_RESULT_TYPE(T,R) > matrix_inner_old(World& world,
539             const std::vector< Function<T,NDIM> >& f,
540             const std::vector< Function<R,NDIM> >& g,
541             bool sym=false) {
542         PROFILE_BLOCK(Vmatrix_inner);
543         long n=f.size(), m=g.size();
544         Tensor< TENSOR_RESULT_TYPE(T,R) > r(n,m);
545         if (sym) MADNESS_ASSERT(n==m);
546 
547         world.gop.fence();
548         compress(world, f);
549         if ((void*)(&f) != (void*)(&g)) compress(world, g);
550 
551         for (long i=0; i<n; ++i) {
552             long jtop = m;
553             if (sym) jtop = i+1;
554             for (long j=0; j<jtop; ++j) {
555                 r(i,j) = f[i].inner_local(g[j]);
556                 if (sym) r(j,i) = conj(r(i,j));
557             }
558          }
559 
560 //        for (long i=n-1; i>=0; --i) {
561 //            long jtop = m;
562 //            if (sym) jtop = i+1;
563 //            world.taskq.add(new MatrixInnerTask<T,R,NDIM>(r(i,_), f[i], g, jtop));
564 //        }
565         world.gop.fence();
566         world.gop.sum(r.ptr(),n*m);
567 
568 //        if (sym) {
569 //            for (int i=0; i<n; ++i) {
570 //                for (int j=0; j<i; ++j) {
571 //                    r(j,i) = conj(r(i,j));
572 //                }
573 //            }
574 //        }
575         return r;
576     }
577 
578     /// Computes the element-wise inner product of two function vectors - q(i) = inner(f[i],g[i])
579     template <typename T, typename R, std::size_t NDIM>
inner(World & world,const std::vector<Function<T,NDIM>> & f,const std::vector<Function<R,NDIM>> & g)580     Tensor< TENSOR_RESULT_TYPE(T,R) > inner(World& world,
581                                             const std::vector< Function<T,NDIM> >& f,
582                                             const std::vector< Function<R,NDIM> >& g) {
583         PROFILE_BLOCK(Vinnervv);
584         long n=f.size(), m=g.size();
585         MADNESS_ASSERT(n==m);
586         Tensor< TENSOR_RESULT_TYPE(T,R) > r(n);
587 
588         compress(world, f);
589         compress(world, g);
590 
591         for (long i=0; i<n; ++i) {
592             r(i) = f[i].inner_local(g[i]);
593         }
594 
595         world.taskq.fence();
596         world.gop.sum(r.ptr(),n);
597         world.gop.fence();
598         return r;
599     }
600 
601 
602     /// Computes the inner product of a function with a function vector - q(i) = inner(f,g[i])
603     template <typename T, typename R, std::size_t NDIM>
inner(World & world,const Function<T,NDIM> & f,const std::vector<Function<R,NDIM>> & g)604     Tensor< TENSOR_RESULT_TYPE(T,R) > inner(World& world,
605                                             const Function<T,NDIM>& f,
606                                             const std::vector< Function<R,NDIM> >& g) {
607         PROFILE_BLOCK(Vinner);
608         long n=g.size();
609         Tensor< TENSOR_RESULT_TYPE(T,R) > r(n);
610 
611         f.compress();
612         compress(world, g);
613 
614         for (long i=0; i<n; ++i) {
615             r(i) = f.inner_local(g[i]);
616         }
617 
618         world.taskq.fence();
619         world.gop.sum(r.ptr(),n);
620         world.gop.fence();
621         return r;
622     }
623 
624     /// inner function with right signature for the nonlinear sovler
625     /// this is needed for the KAIN solvers and other functions
626     template <typename T, typename R, std::size_t NDIM>
inner(const std::vector<Function<T,NDIM>> & f,const std::vector<Function<R,NDIM>> & g)627     double inner( const std::vector< Function<T,NDIM> >& f,
628 	                                            const std::vector< Function<R,NDIM> >& g){
629       MADNESS_ASSERT(f.size()==g.size());
630       if(f.empty()) return 0.0;
631       else return inner(f[0].world(),f,g).sum();
632     }
633 
634 
635     /// Multiplies a function against a vector of functions --- q[i] = a * v[i]
636     template <typename T, typename R, std::size_t NDIM>
637     std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
638     mul(World& world,
639         const Function<T,NDIM>& a,
640         const std::vector< Function<R,NDIM> >& v,
641         bool fence=true) {
642         PROFILE_BLOCK(Vmul);
643         a.reconstruct(false);
644         reconstruct(world, v, false);
645         world.gop.fence();
646         return vmulXX(a, v, 0.0, fence);
647     }
648 
649     /// Multiplies a function against a vector of functions using sparsity of a and v[i] --- q[i] = a * v[i]
650     template <typename T, typename R, std::size_t NDIM>
651     std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
652     mul_sparse(World& world,
653                const Function<T,NDIM>& a,
654                const std::vector< Function<R,NDIM> >& v,
655                double tol,
656                bool fence=true) {
657         PROFILE_BLOCK(Vmulsp);
658         a.reconstruct(false);
659         reconstruct(world, v, false);
660         world.gop.fence();
661         for (unsigned int i=0; i<v.size(); ++i) {
662             v[i].norm_tree(false);
663         }
664         a.norm_tree();
665         return vmulXX(a, v, tol, fence);
666     }
667 
668     /// Makes the norm tree for all functions in a vector
669     template <typename T, std::size_t NDIM>
670     void norm_tree(World& world,
671                    const std::vector< Function<T,NDIM> >& v,
672                    bool fence=true)
673     {
674         PROFILE_BLOCK(Vnorm_tree);
675         for (unsigned int i=0; i<v.size(); ++i) {
676             v[i].norm_tree(false);
677         }
678         if (fence) world.gop.fence();
679     }
680 
681     /// Multiplies two vectors of functions q[i] = a[i] * b[i]
682     template <typename T, typename R, std::size_t NDIM>
683     std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
684     mul(World& world,
685         const std::vector< Function<T,NDIM> >& a,
686         const std::vector< Function<R,NDIM> >& b,
687         bool fence=true) {
688         PROFILE_BLOCK(Vmulvv);
689         reconstruct(world, a, true);
690         if (&a != &b) reconstruct(world, b, true);
691 
692         std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > q(a.size());
693         for (unsigned int i=0; i<a.size(); ++i) {
694             q[i] = mul(a[i], b[i], false);
695         }
696         if (fence) world.gop.fence();
697         return q;
698     }
699 
700 
701     /// Computes the square of a vector of functions --- q[i] = v[i]**2
702     template <typename T, std::size_t NDIM>
703     std::vector< Function<T,NDIM> >
704     square(World& world,
705            const std::vector< Function<T,NDIM> >& v,
706            bool fence=true) {
707         return mul<T,T,NDIM>(world, v, v, fence);
708 //         std::vector< Function<T,NDIM> > vsq(v.size());
709 //         for (unsigned int i=0; i<v.size(); ++i) {
710 //             vsq[i] = square(v[i], false);
711 //         }
712 //         if (fence) world.gop.fence();
713 //         return vsq;
714     }
715 
716 
717     /// Sets the threshold in a vector of functions
718     template <typename T, std::size_t NDIM>
719     void set_thresh(World& world, std::vector< Function<T,NDIM> >& v, double thresh, bool fence=true) {
720         for (unsigned int j=0; j<v.size(); ++j) {
721             v[j].set_thresh(thresh,false);
722         }
723         if (fence) world.gop.fence();
724     }
725 
726     /// Returns the complex conjugate of the vector of functions
727     template <typename T, std::size_t NDIM>
728     std::vector< Function<T,NDIM> >
729     conj(World& world,
730          const std::vector< Function<T,NDIM> >& v,
731          bool fence=true) {
732         PROFILE_BLOCK(Vconj);
733         std::vector< Function<T,NDIM> > r = copy(world, v); // Currently don't have oop conj
734         for (unsigned int i=0; i<v.size(); ++i) {
735             r[i].conj(false);
736         }
737         if (fence) world.gop.fence();
738         return r;
739     }
740 
741 
742     /// Returns a deep copy of a vector of functions
743     template <typename T, std::size_t NDIM>
744     std::vector< Function<T,NDIM> >
745     copy(World& world,
746          const std::vector< Function<T,NDIM> >& v,
747          bool fence=true) {
748         PROFILE_BLOCK(Vcopy);
749         std::vector< Function<T,NDIM> > r(v.size());
750         for (unsigned int i=0; i<v.size(); ++i) {
751             r[i] = copy(v[i], false);
752         }
753         if (fence) world.gop.fence();
754         return r;
755     }
756 
757    /// Returns a vector of deep copies of of a function
758     template <typename T, std::size_t NDIM>
759     std::vector< Function<T,NDIM> >
760     copy(World& world,
761          const Function<T,NDIM>& v,
762          const unsigned int n,
763          bool fence=true) {
764         PROFILE_BLOCK(Vcopy1);
765         std::vector< Function<T,NDIM> > r(n);
766         for (unsigned int i=0; i<n; ++i) {
767             r[i] = copy(v, false);
768         }
769         if (fence) world.gop.fence();
770         return r;
771     }
772 
773     /// Returns new vector of functions --- q[i] = a[i] + b[i]
774     template <typename T, typename R, std::size_t NDIM>
775     std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
776     add(World& world,
777         const std::vector< Function<T,NDIM> >& a,
778         const std::vector< Function<R,NDIM> >& b,
779         bool fence=true) {
780         PROFILE_BLOCK(Vadd);
781         MADNESS_ASSERT(a.size() == b.size());
782         compress(world, a);
783         compress(world, b);
784 
785         std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > r(a.size());
786         for (unsigned int i=0; i<a.size(); ++i) {
787             r[i] = add(a[i], b[i], false);
788         }
789         if (fence) world.gop.fence();
790         return r;
791     }
792 
793      /// Returns new vector of functions --- q[i] = a + b[i]
794     template <typename T, typename R, std::size_t NDIM>
795     std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
796     add(World& world,
797         const Function<T,NDIM> & a,
798         const std::vector< Function<R,NDIM> >& b,
799         bool fence=true) {
800         PROFILE_BLOCK(Vadd1);
801         a.compress();
802         compress(world, b);
803 
804         std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > r(b.size());
805         for (unsigned int i=0; i<b.size(); ++i) {
806             r[i] = add(a, b[i], false);
807         }
808         if (fence) world.gop.fence();
809         return r;
810     }
811     template <typename T, typename R, std::size_t NDIM>
812     inline std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
813     add(World& world,
814         const std::vector< Function<R,NDIM> >& b,
815         const Function<T,NDIM> & a,
816         bool fence=true) {
817         return add(world, a, b, fence);
818     }
819 
820     /// Returns new vector of functions --- q[i] = a[i] - b[i]
821     template <typename T, typename R, std::size_t NDIM>
822     std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
823     sub(World& world,
824         const std::vector< Function<T,NDIM> >& a,
825         const std::vector< Function<R,NDIM> >& b,
826         bool fence=true) {
827         PROFILE_BLOCK(Vsub);
828         MADNESS_ASSERT(a.size() == b.size());
829         compress(world, a);
830         compress(world, b);
831 
832         std::vector< Function<TENSOR_RESULT_TYPE(T,R),NDIM> > r(a.size());
833         for (unsigned int i=0; i<a.size(); ++i) {
834             r[i] = sub(a[i], b[i], false);
835         }
836         if (fence) world.gop.fence();
837         return r;
838     }
839 
840     /// Returns new function --- q = sum_i f[i]
841     template <typename T, std::size_t NDIM>
842     Function<T, NDIM> sum(World& world, const std::vector<Function<T,NDIM> >& f,
843         bool fence=true) {
844 
845         compress(world, f);
846         Function<T,NDIM> r=real_factory_3d(world).compressed();
847 
848         for (unsigned int i=0; i<f.size(); ++i) r.gaxpy(1.0,f[i],1.0,false);
849         if (fence) world.gop.fence();
850         return r;
851     }
852 
853 
854     /// Multiplies and sums two vectors of functions r = \sum_i a[i] * b[i]
855     template <typename T, typename R, std::size_t NDIM>
856     Function<TENSOR_RESULT_TYPE(T,R), NDIM>
857     dot(World& world,
858         const std::vector< Function<T,NDIM> >& a,
859         const std::vector< Function<R,NDIM> >& b,
860         bool fence=true) {
861         return sum(world,mul(world,a,b,true),fence);
862     }
863 
864 
865 
866     /// Generalized A*X+Y for vectors of functions ---- a[i] = alpha*a[i] + beta*b[i]
867     template <typename T, typename Q, typename R, std::size_t NDIM>
868     void gaxpy(World& world,
869                Q alpha,
870                std::vector< Function<T,NDIM> >& a,
871                Q beta,
872                const std::vector< Function<R,NDIM> >& b,
873                bool fence=true) {
874         PROFILE_BLOCK(Vgaxpy);
875         MADNESS_ASSERT(a.size() == b.size());
876         compress(world, a);
877         compress(world, b);
878 
879         for (unsigned int i=0; i<a.size(); ++i) {
880             a[i].gaxpy(alpha, b[i], beta, false);
881         }
882         if (fence) world.gop.fence();
883     }
884 
885 
886     /// Applies a vector of operators to a vector of functions --- q[i] = apply(op[i],f[i])
887     template <typename opT, typename R, std::size_t NDIM>
888     std::vector< Function<TENSOR_RESULT_TYPE(typename opT::opT,R), NDIM> >
apply(World & world,const std::vector<std::shared_ptr<opT>> & op,const std::vector<Function<R,NDIM>> f)889     apply(World& world,
890           const std::vector< std::shared_ptr<opT> >& op,
891           const std::vector< Function<R,NDIM> > f) {
892 
893         PROFILE_BLOCK(Vapplyv);
894         MADNESS_ASSERT(f.size()==op.size());
895 
896         std::vector< Function<R,NDIM> >& ncf = *const_cast< std::vector< Function<R,NDIM> >* >(&f);
897 
898         reconstruct(world, f);
899         nonstandard(world, ncf);
900 
901         std::vector< Function<TENSOR_RESULT_TYPE(typename opT::opT,R), NDIM> > result(f.size());
902         for (unsigned int i=0; i<f.size(); ++i) {
903             MADNESS_ASSERT(not op[i]->is_slaterf12);
904             result[i] = apply_only(*op[i], f[i], false);
905         }
906 
907         world.gop.fence();
908 
909         standard(world, ncf, false);  // restores promise of logical constness
910         world.gop.fence();
911         reconstruct(world, result);
912 
913         return result;
914     }
915 
916 
917     /// Applies an operator to a vector of functions --- q[i] = apply(op,f[i])
918     template <typename T, typename R, std::size_t NDIM>
919     std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> >
apply(World & world,const SeparatedConvolution<T,NDIM> & op,const std::vector<Function<R,NDIM>> f)920     apply(World& world,
921           const SeparatedConvolution<T,NDIM>& op,
922           const std::vector< Function<R,NDIM> > f) {
923         PROFILE_BLOCK(Vapply);
924 
925         std::vector< Function<R,NDIM> >& ncf = *const_cast< std::vector< Function<R,NDIM> >* >(&f);
926 
927         reconstruct(world, f);
928         nonstandard(world, ncf);
929 
930         std::vector< Function<TENSOR_RESULT_TYPE(T,R), NDIM> > result(f.size());
931         for (unsigned int i=0; i<f.size(); ++i) {
932             result[i] = apply_only(op, f[i], false);
933         }
934 
935         world.gop.fence();
936 
937         standard(world, ncf, false);  // restores promise of logical constness
938         reconstruct(world, result);
939 
940         if (op.is_slaterf12) {
941         	MADNESS_ASSERT(not op.destructive());
942             for (unsigned int i=0; i<f.size(); ++i) {
943             	double trace=f[i].trace();
944                 result[i]=(result[i]-trace).scale(-0.5/op.mu());
945             }
946         }
947 
948         return result;
949     }
950 
951     /// Normalizes a vector of functions --- v[i] = v[i].scale(1.0/v[i].norm2())
952     template <typename T, std::size_t NDIM>
953     void normalize(World& world, std::vector< Function<T,NDIM> >& v, bool fence=true) {
954         PROFILE_BLOCK(Vnormalize);
955         std::vector<double> nn = norm2s(world, v);
956         for (unsigned int i=0; i<v.size(); ++i) v[i].scale(1.0/nn[i],false);
957         if (fence) world.gop.fence();
958     }
959 
960     template <typename T, std::size_t NDIM>
961     void print_size(World &world, const std::vector<Function<T,NDIM> > &v, const std::string &msg = "vectorfunction" ){
962     	if(v.empty()){
963     		if(world.rank()==0) std::cout << "print_size: " << msg << " is empty" << std::endl;
964     	}else if(v.size()==1){
965     		v.front().print_size(msg);
966     	}else{
967     		for(auto x:v){
968     			x.print_size(msg);
969     		}
970     	}
971     }
972 
973     // gives back the size in GB
974     template <typename T, std::size_t NDIM>
get_size(World & world,const std::vector<Function<T,NDIM>> & v)975     double get_size(World& world, const std::vector< Function<T,NDIM> >& v){
976 
977     	if (v.empty()) return 0.0;
978 
979     	const double d=sizeof(T);
980         const double fac=1024*1024*1024;
981 
982         double size=0.0;
983         for(unsigned int i=0;i<v.size();i++){
984             if (v[i].is_initialized()) size+=v[i].size();
985         }
986 
987         return size/fac*d;
988 
989     }
990 
991     // gives back the size in GB
992     template <typename T, std::size_t NDIM>
get_size(const Function<T,NDIM> & f)993     double get_size(const Function<T,NDIM> & f){
994     	const double d=sizeof(T);
995         const double fac=1024*1024*1024;
996         double size=f.size();
997         return size/fac*d;
998     }
999 
1000     /// apply op on the input vector yielding an output vector of functions
1001 
1002     /// @param[in]  op   the operator working on vin
1003     /// @param[in]  vin  vector of input Functions; needs to be refined to common level!
1004     /// @return vector of output Functions vout = op(vin)
1005     template <typename T, typename opT, std::size_t NDIM>
1006     std::vector<Function<T,NDIM> > multi_to_multi_op_values(const opT& op,
1007             const std::vector< Function<T,NDIM> >& vin,
1008             const bool fence=true) {
1009         MADNESS_ASSERT(vin.size()>0);
1010         MADNESS_ASSERT(vin[0].is_initialized()); // might be changed
1011         World& world=vin[0].world();
1012         Function<T,NDIM> dummy;
1013         dummy.set_impl(vin[0], false);
1014         std::vector<Function<T,NDIM> > vout=zero_functions<T,NDIM>(world, op.get_result_size());
1015         for (auto& out : vout) out.set_impl(vin[0],false);
1016         dummy.multi_to_multi_op_values(op, vin, vout, fence);
1017         return vout;
1018     }
1019 
1020 
1021 
1022 
1023     // convenience operators
1024 
1025     template <typename T, std::size_t NDIM>
1026     std::vector<Function<T,NDIM> > operator+(const std::vector<Function<T,NDIM> >& lhs,
1027             const std::vector<Function<T,NDIM>>& rhs) {
1028         return add(lhs[0].world(),lhs,rhs);
1029     }
1030 
1031     template <typename T, std::size_t NDIM>
1032     std::vector<Function<T,NDIM> > operator-(const std::vector<Function<T,NDIM> >& lhs,
1033             const std::vector<Function<T,NDIM> >& rhs) {
1034         return sub(lhs[0].world(),lhs,rhs);
1035     }
1036 
1037     template <typename T, std::size_t NDIM>
1038     std::vector<Function<T,NDIM> > operator*(const double fac,
1039             const std::vector<Function<T,NDIM> >& rhs) {
1040         std::vector<Function<T,NDIM> > tmp=copy(rhs[0].world(),rhs);
1041         scale(tmp[0].world(),tmp,fac);
1042         return tmp;
1043     }
1044 
1045     template <typename T, std::size_t NDIM>
1046     std::vector<Function<T,NDIM> > operator*(const std::vector<Function<T,NDIM> >& rhs,
1047             const double fac) {
1048         std::vector<Function<T,NDIM> > tmp=copy(rhs[0].world(),rhs);
1049         scale(tmp[0].world(),tmp,fac);
1050         return tmp;
1051     }
1052 
1053     /// multiply a vector of functions with a function: r[i] = v[i] * a
1054     template <typename T, std::size_t NDIM>
1055     std::vector<Function<T,NDIM> > operator*(const Function<T,NDIM>& a,
1056             const std::vector<Function<T,NDIM> >& v) {
1057         return mul(v[0].world(),a,v,true);
1058     }
1059 
1060 
1061     /// multiply a vector of functions with a function: r[i] = a * v[i]
1062     template <typename T, std::size_t NDIM>
1063     std::vector<Function<T,NDIM> > operator*(const std::vector<Function<T,NDIM> >& v,
1064             const Function<T,NDIM>& a) {
1065         return mul(v[0].world(),a,v,true);
1066     }
1067 
1068 
1069     template <typename T, std::size_t NDIM>
1070     std::vector<Function<T,NDIM> > operator+=(std::vector<Function<T,NDIM> >& rhs,
1071             const std::vector<Function<T,NDIM> >& lhs) {
1072         rhs=add(rhs[0].world(),rhs,lhs);
1073         return rhs;
1074     }
1075 
1076     template <typename T, std::size_t NDIM>
1077     std::vector<Function<T,NDIM> > operator-=(std::vector<Function<T,NDIM> >& rhs,
1078             const std::vector<Function<T,NDIM> >& lhs) {
1079         rhs=sub(rhs[0].world(),rhs,lhs);
1080         return rhs;
1081     }
1082 
1083     /// shorthand gradient operator
1084 
1085     /// returns the differentiated function f in all NDIM directions
1086     /// @param[in]  f       the function on which the grad operator works on
1087     /// @param[in]  refine  refinement before diff'ing makes the result more accurate
1088     /// @param[in]  fence   fence after completion; if reconstruction is needed always fence
1089     /// @return     the vector \frac{\partial}{\partial x_i} f
1090     template <typename T, std::size_t NDIM>
1091     std::vector<Function<T,NDIM> > grad(const Function<T,NDIM>& f,
1092             bool refine=false, bool fence=true) {
1093 
1094         World& world=f.world();
1095         f.reconstruct();
1096         if (refine) f.refine();      // refine to make result more precise
1097 
1098         std::vector< std::shared_ptr< Derivative<T,NDIM> > > grad=
1099                 gradient_operator<T,NDIM>(world);
1100 
1101         std::vector<Function<T,NDIM> > result(NDIM);
1102         for (int i=0; i<NDIM; ++i) result[i]=apply(*(grad[i]),f,false);
1103         if (fence) world.gop.fence();
1104         return result;
1105     }
1106 
1107     /// shorthand div operator
1108 
1109     /// returns the dot product of nabla with a vector f
1110     /// @param[in]  f       the vector of functions on which the div operator works on
1111     /// @param[in]  refine  refinement before diff'ing makes the result more accurate
1112     /// @param[in]  fence   fence after completion; currently always fences
1113     /// @return     the vector \frac{\partial}{\partial x_i} f
1114     /// TODO: add this to operator fusion
1115     template <typename T, std::size_t NDIM>
1116     Function<T,NDIM> div(const std::vector<Function<T,NDIM> >& v,
1117             bool do_refine=false, bool fence=true) {
1118 
1119         MADNESS_ASSERT(v.size()>0);
1120         World& world=v[0].world();
1121         reconstruct(world,v);
1122         if (do_refine) refine(world,v);      // refine to make result more precise
1123 
1124         std::vector< std::shared_ptr< Derivative<T,NDIM> > > grad=
1125                 gradient_operator<T,NDIM>(world);
1126 
1127         std::vector<Function<T,NDIM> > result(NDIM);
1128         for (int i=0; i<NDIM; ++i) result[i]=apply(*(grad[i]),v[i],false);
1129         world.gop.fence();
1130         return sum(world,result,fence);
1131     }
1132 
1133     /// load a vector of functions
1134     template<typename T, size_t NDIM>
load_function(World & world,std::vector<Function<T,NDIM>> & f,const std::string name)1135     void load_function(World& world, std::vector<Function<T,NDIM> >& f,
1136             const std::string name) {
1137         if (world.rank()==0) print("loading vector of functions",name);
1138         archive::ParallelInputArchive ar(world, name.c_str(), 1);
1139         std::size_t fsize=0;
1140         ar & fsize;
1141         f.resize(fsize);
1142         for (std::size_t i=0; i<fsize; ++i) ar & f[i];
1143     }
1144 
1145     /// save a vector of functions
1146     template<typename T, size_t NDIM>
save_function(const std::vector<Function<T,NDIM>> & f,const std::string name)1147     void save_function(const std::vector<Function<T,NDIM> >& f, const std::string name) {
1148         if (f.size()>0) {
1149             World& world=f.front().world();
1150             if (world.rank()==0) print("saving vector of functions",name);
1151             archive::ParallelOutputArchive ar(world, name.c_str(), 1);
1152             std::size_t fsize=f.size();
1153             ar & fsize;
1154             for (std::size_t i=0; i<fsize; ++i) ar & f[i];
1155         }
1156     }
1157 
1158 
1159 }
1160 #endif // MADNESS_MRA_VMRA_H__INCLUDED
1161