1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 */
31 
32 #ifndef MADNESS_MRA_FUNCIMPL_H__INCLUDED
33 #define MADNESS_MRA_FUNCIMPL_H__INCLUDED
34 
35 /// \file funcimpl.h
36 /// \brief Provides FunctionCommonData, FunctionImpl and FunctionFactory
37 
38 #include <iostream>
39 #include <type_traits>
40 #include <madness/world/MADworld.h>
41 #include <madness/world/print.h>
42 #include <madness/misc/misc.h>
43 #include <madness/tensor/tensor.h>
44 #include <madness/tensor/gentensor.h>
45 
46 #include <madness/mra/function_common_data.h>
47 #include <madness/mra/indexit.h>
48 #include <madness/mra/key.h>
49 #include <madness/mra/funcdefaults.h>
50 #include <madness/mra/function_factory.h>
51 
52 #include "leafop.h"
53 
54 namespace madness {
55     template <typename T, std::size_t NDIM>
56     class DerivativeBase;
57 
58     template<typename T, std::size_t NDIM>
59     class FunctionImpl;
60 
61     template<typename T, std::size_t NDIM>
62     class FunctionNode;
63 
64     template<typename T, std::size_t NDIM>
65     class Function;
66 
67     template<typename T, std::size_t NDIM>
68     class FunctionFactory;
69 
70     template<typename T, std::size_t NDIM, std::size_t MDIM>
71     class CompositeFunctorInterface;
72 
73     template<int D>
74     class LoadBalImpl;
75 
76 }
77 
78 namespace madness {
79 
80 
81     /// A simple process map
82     template<typename keyT>
83     class SimplePmap : public WorldDCPmapInterface<keyT> {
84     private:
85         const int nproc;
86         const ProcessID me;
87 
88     public:
SimplePmap(World & world)89         SimplePmap(World& world) : nproc(world.nproc()), me(world.rank())
90         { }
91 
owner(const keyT & key)92         ProcessID owner(const keyT& key) const {
93             if (key.level() == 0)
94                 return 0;
95             else
96                 return key.hash() % nproc;
97         }
98     };
99 
100     /// A pmap that locates children on odd levels with their even level parents
101     template <typename keyT>
102     class LevelPmap : public WorldDCPmapInterface<keyT> {
103     private:
104         const int nproc;
105     public:
LevelPmap()106         LevelPmap() : nproc(0) {};
107 
LevelPmap(World & world)108         LevelPmap(World& world) : nproc(world.nproc()) {}
109 
110         /// Find the owner of a given key
owner(const keyT & key)111         ProcessID owner(const keyT& key) const {
112             Level n = key.level();
113             if (n == 0) return 0;
114             hashT hash;
115             if (n <= 3 || (n&0x1)) hash = key.hash();
116             else hash = key.parent().hash();
117             return hash%nproc;
118         }
119     };
120 
121 
122     /// FunctionNode holds the coefficients, etc., at each node of the 2^NDIM-tree
123     template<typename T, std::size_t NDIM>
124     class FunctionNode {
125     public:
126     	typedef GenTensor<T> coeffT;
127     	typedef Tensor<T> tensorT;
128     private:
129         // Should compile OK with these volatile but there should
130         // be no need to set as volatile since the container internally
131         // stores the entire entry as volatile
132 
133         coeffT _coeffs; ///< The coefficients, if any
134         double _norm_tree; ///< After norm_tree will contain norm of coefficients summed up tree
135         bool _has_children; ///< True if there are children
136         coeffT buffer; ///< The coefficients, if any
137 
138     public:
139         typedef WorldContainer<Key<NDIM> , FunctionNode<T, NDIM> > dcT; ///< Type of container holding the nodes
140         /// Default constructor makes node without coeff or children
FunctionNode()141         FunctionNode() :
142             _coeffs(), _norm_tree(1e300), _has_children(false) {
143         }
144 
145         /// Constructor from given coefficients with optional children
146 
147         /// Note that only a shallow copy of the coeff are taken so
148         /// you should pass in a deep copy if you want the node to
149         /// take ownership.
150         explicit
151         FunctionNode(const coeffT& coeff, bool has_children = false) :
_coeffs(coeff)152             _coeffs(coeff), _norm_tree(1e300), _has_children(has_children) {
153         }
154 
155         explicit
FunctionNode(const coeffT & coeff,double norm_tree,bool has_children)156         FunctionNode(const coeffT& coeff, double norm_tree, bool has_children) :
157             _coeffs(coeff), _norm_tree(norm_tree), _has_children(has_children) {
158         }
159 
FunctionNode(const FunctionNode<T,NDIM> & other)160         FunctionNode(const FunctionNode<T, NDIM>& other) {
161             *this = other;
162         }
163 
164         FunctionNode<T, NDIM>&
165         operator=(const FunctionNode<T, NDIM>& other) {
166             if (this != &other) {
167                 coeff() = copy(other.coeff());
168                 _norm_tree = other._norm_tree;
169                 _has_children = other._has_children;
170             }
171             return *this;
172         }
173 
174         /// Copy with possible type conversion of coefficients, copying all other state
175 
176         /// Choose to not overload copy and type conversion operators
177         /// so there are no automatic type conversions.
178         template<typename Q>
179         FunctionNode<Q, NDIM>
convert()180         convert() const {
181             return FunctionNode<Q, NDIM> (madness::convert<Q,T>(coeff()), _has_children);
182         }
183 
184         /// Returns true if there are coefficients in this node
185         bool
has_coeff()186         has_coeff() const {
187             return _coeffs.has_data();
188         }
189 
190 
191         /// Returns true if this node has children
192         bool
has_children()193         has_children() const {
194             return _has_children;
195         }
196 
197         /// Returns true if this does not have children
198         bool
is_leaf()199         is_leaf() const {
200             return !_has_children;
201         }
202 
203         /// Returns true if this node is invalid (no coeffs and no children)
204         bool
is_invalid()205         is_invalid() const {
206             return !(has_coeff() || has_children());
207         }
208 
209         /// Returns a non-const reference to the tensor containing the coeffs
210 
211         /// Returns an empty tensor if there are no coefficients.
212         coeffT&
coeff()213         coeff() {
214             MADNESS_ASSERT(_coeffs.ndim() == -1 || (_coeffs.dim(0) <= 2
215                                                     * MAXK && _coeffs.dim(0) >= 0));
216             return const_cast<coeffT&>(_coeffs);
217         }
218 
219         /// Returns a const reference to the tensor containing the coeffs
220 
221         /// Returns an empty tensor if there are no coefficeints.
222         const coeffT&
coeff()223         coeff() const {
224             return const_cast<const coeffT&>(_coeffs);
225         }
226 
227         /// Returns the number of coefficients in this node
size()228         size_t size() const {
229             return _coeffs.size();
230         }
231 
232     public:
233 
234         /// reduces the rank of the coefficients (if applicable)
reduceRank(const double & eps)235         void reduceRank(const double& eps) {
236             _coeffs.reduce_rank(eps);
237         }
238 
239         /// Sets \c has_children attribute to value of \c flag.
set_has_children(bool flag)240         void set_has_children(bool flag) {
241             _has_children = flag;
242         }
243 
244         /// Sets \c has_children attribute to true recurring up to ensure connected
set_has_children_recursive(const typename FunctionNode<T,NDIM>::dcT & c,const Key<NDIM> & key)245         void set_has_children_recursive(const typename FunctionNode<T,NDIM>::dcT& c,const Key<NDIM>& key) {
246             //madness::print("   set_chi_recu: ", key, *this);
247             //PROFILE_MEMBER_FUNC(FunctionNode); // Too fine grain for routine profiling
248             if (!(has_children() || has_coeff() || key.level()==0)) {
249                 // If node already knows it has children or it has
250                 // coefficients then it must already be connected to
251                 // its parent.  If not, the node was probably just
252                 // created for this operation and must be connected to
253                 // its parent.
254                 Key<NDIM> parent = key.parent();
255                 // Task on next line used to be TaskAttributes::hipri()) ... but deferring execution of this
256                 // makes sense since it is not urgent and lazy connection will likely mean that less forwarding
257                 // will happen since the upper level task will have already made the connection.
258                 const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
259                 //const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
260                 //madness::print("   set_chi_recu: forwarding",key,parent);
261             }
262             _has_children = true;
263         }
264 
265         /// Sets \c has_children attribute to value of \c !flag
set_is_leaf(bool flag)266         void set_is_leaf(bool flag) {
267             _has_children = !flag;
268         }
269 
270         /// Takes a \em shallow copy of the coeff --- same as \c this->coeff()=coeff
set_coeff(const coeffT & coeffs)271         void set_coeff(const coeffT& coeffs) {
272             coeff() = coeffs;
273             if ((_coeffs.has_data()) and ((_coeffs.dim(0) < 0) || (_coeffs.dim(0)>2*MAXK))) {
274                 print("set_coeff: may have a problem");
275                 print("set_coeff: coeff.dim[0] =", coeffs.dim(0), ", 2* MAXK =", 2*MAXK);
276             }
277             MADNESS_ASSERT(coeffs.dim(0)<=2*MAXK && coeffs.dim(0)>=0);
278         }
279 
280         /// Clears the coefficients (has_coeff() will subsequently return false)
clear_coeff()281         void clear_coeff() {
282             coeff()=coeffT();
283         }
284 
285         /// Scale the coefficients of this node
286         template <typename Q>
scale(Q a)287         void scale(Q a) {
288             _coeffs.scale(a);
289         }
290 
291         /// Sets the value of norm_tree
set_norm_tree(double norm_tree)292         void set_norm_tree(double norm_tree) {
293             _norm_tree = norm_tree;
294         }
295 
296         /// Gets the value of norm_tree
get_norm_tree()297         double get_norm_tree() const {
298             return _norm_tree;
299         }
300 
301 
302         /// General bi-linear operation --- this = this*alpha + other*beta
303 
304         /// This/other may not have coefficients.  Has_children will be
305         /// true in the result if either this/other have children.
306         template <typename Q, typename R>
gaxpy_inplace(const T & alpha,const FunctionNode<Q,NDIM> & other,const R & beta)307         void gaxpy_inplace(const T& alpha, const FunctionNode<Q,NDIM>& other, const R& beta) {
308             //PROFILE_MEMBER_FUNC(FuncNode);  // Too fine grain for routine profiling
309             if (other.has_children())
310                 _has_children = true;
311             if (has_coeff()) {
312                 if (other.has_coeff()) {
313                     coeff().gaxpy(alpha,other.coeff(),beta);
314                 }
315                 else {
316                     coeff().scale(alpha);
317                 }
318             }
319             else if (other.has_coeff()) {
320                 coeff() = other.coeff()*beta; //? Is this the correct type conversion?
321             }
322         }
323 
324         /// Accumulate inplace and if necessary connect node to parent
accumulate2(const tensorT & t,const typename FunctionNode<T,NDIM>::dcT & c,const Key<NDIM> & key)325         double accumulate2(const tensorT& t, const typename FunctionNode<T,NDIM>::dcT& c,
326                            const Key<NDIM>& key) {
327             double cpu0=cpu_time();
328             if (has_coeff()) {
329             	MADNESS_ASSERT(coeff().tensor_type()==TT_FULL);
330                 //            	if (coeff().type==TT_FULL) {
331                 coeff() += coeffT(t,-1.0,TT_FULL);
332                 //            	} else {
333                 //            		tensorT cc=coeff().full_tensor_copy();;
334                 //            		cc += t;
335                 //            		coeff()=coeffT(cc,args);
336                 //            	}
337             }
338             else {
339                 // No coeff and no children means the node is newly
340                 // created for this operation and therefore we must
341                 // tell its parent that it exists.
342             	coeff() = coeffT(t,-1.0,TT_FULL);
343                 //                coeff() = copy(t);
344                 //                coeff() = coeffT(t,args);
345                 if ((!_has_children) && key.level()> 0) {
346                     Key<NDIM> parent = key.parent();
347 		    if (c.is_local(parent))
348 			const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
349 		    else
350 		      const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
351                 }
352             }
353             double cpu1=cpu_time();
354             return cpu1-cpu0;
355         }
356 
357 
358         /// Accumulate inplace and if necessary connect node to parent
accumulate(const coeffT & t,const typename FunctionNode<T,NDIM>::dcT & c,const Key<NDIM> & key,const TensorArgs & args)359         double accumulate(const coeffT& t, const typename FunctionNode<T,NDIM>::dcT& c,
360                           const Key<NDIM>& key, const TensorArgs& args) {
361             double cpu0=cpu_time();
362             if (has_coeff()) {
363 
364 #if 1
365                 coeff().add_SVD(t,args.thresh);
366                 if (buffer.rank()<coeff().rank()) {
367                     if (buffer.has_data()) {
368                         buffer.add_SVD(coeff(),args.thresh);
369                     } else {
370                         buffer=copy(coeff());
371                     }
372                     coeff()=coeffT();
373                 }
374 
375 #else
376                 // always do low rank
377                 coeff().add_SVD(t,args.thresh);
378 
379 #endif
380 
381             } else {
382                 // No coeff and no children means the node is newly
383                 // created for this operation and therefore we must
384                 // tell its parent that it exists.
385             	coeff() = copy(t);
386                 if ((!_has_children) && key.level()> 0) {
387                     Key<NDIM> parent = key.parent();
388 		    if (c.is_local(parent))
389 		      const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
390 		    else
391 		      const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent);
392                 }
393             }
394             double cpu1=cpu_time();
395             return cpu1-cpu0;
396         }
397 
consolidate_buffer(const TensorArgs & args)398         void consolidate_buffer(const TensorArgs& args) {
399             if ((coeff().has_data()) and (buffer.has_data())) {
400                 coeff().add_SVD(buffer,args.thresh);
401             } else if (buffer.has_data()) {
402                 coeff()=buffer;
403             }
404             buffer=coeffT();
405         }
406 
trace_conj(const FunctionNode<T,NDIM> & rhs)407         T trace_conj(const FunctionNode<T,NDIM>& rhs) const {
408             return this->_coeffs.trace_conj((rhs._coeffs));
409         }
410 
411         template <typename Archive>
serialize(Archive & ar)412         void serialize(Archive& ar) {
413             ar & coeff() & _has_children & _norm_tree;
414         }
415 
416     };
417 
418     template <typename T, std::size_t NDIM>
419     std::ostream& operator<<(std::ostream& s, const FunctionNode<T,NDIM>& node) {
420         s << "(has_coeff=" << node.has_coeff() << ", has_children=" << node.has_children() << ", norm=";
421         double norm = node.has_coeff() ? node.coeff().normf() : 0.0;
422         if (norm < 1e-12)
423             norm = 0.0;
424         double nt = node.get_norm_tree();
425         if (nt == 1e300) nt = 0.0;
426         s << norm << ", norm_tree=" << nt << "), rank="<< node.coeff().rank()<<")";
427         return s;
428     }
429 
430 
431     /// returns true if the result of a hartree_product is a leaf node (compute norm & error)
432     template<typename T, size_t NDIM>
433     struct hartree_leaf_op {
434 
435         typedef FunctionImpl<T,NDIM> implT;
436         const FunctionImpl<T,NDIM>* f;
437         long k;
do_error_leaf_ophartree_leaf_op438         bool do_error_leaf_op() const {return false;}
439 
hartree_leaf_ophartree_leaf_op440         hartree_leaf_op() {}
hartree_leaf_ophartree_leaf_op441         hartree_leaf_op(const implT* f, const long& k) : f(f), k(k) {}
442 
443         /// no pre-determination
operatorhartree_leaf_op444         bool operator()(const Key<NDIM>& key) const {return false;}
445 
446         /// no post-determination
operatorhartree_leaf_op447         bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
448             MADNESS_EXCEPTION("no post-determination in hartree_leaf_op",1);
449             return true;
450         }
451 
452         /// post-determination: true if f is a leaf and the result is well-represented
453 
454         /// @param[in]  key the hi-dimensional key (breaks into keys for f and g)
455         /// @param[in]  fcoeff coefficients of f of its appropriate key in NS form
456         /// @param[in]  gcoeff coefficients of g of its appropriate key in NS form
operatorhartree_leaf_op457         bool operator()(const Key<NDIM>& key, const Tensor<T>& fcoeff, const Tensor<T>& gcoeff) const {
458 
459             if (key.level()<2) return false;
460             Slice s = Slice(0,k-1);
461             std::vector<Slice> s0(NDIM/2,s);
462 
463             const double tol=f->get_thresh();
464             const double thresh=f->truncate_tol(tol, key);
465             // include the wavelets in the norm, makes it much more accurate
466             const double fnorm=fcoeff.normf();
467             const double gnorm=gcoeff.normf();
468 
469             // if the final norm is small, perform the hartree product and return
470             const double norm=fnorm*gnorm;  // computing the outer product
471             if (norm < thresh) return true;
472 
473             // norm of the scaling function coefficients
474             const double sfnorm=fcoeff(s0).normf();
475             const double sgnorm=gcoeff(s0).normf();
476 
477             // get the error of both functions and of the pair function;
478             // need the abs for numerics: sfnorm might be equal fnorm.
479             const double ferror=sqrt(std::abs(fnorm*fnorm-sfnorm*sfnorm));
480             const double gerror=sqrt(std::abs(gnorm*gnorm-sgnorm*sgnorm));
481 
482             // if the expected error is small, perform the hartree product and return
483             const double error=fnorm*gerror + ferror*gnorm + ferror*gerror;
484             //            const double error=sqrt(fnorm*fnorm*gnorm*gnorm - sfnorm*sfnorm*sgnorm*sgnorm);
485 
486             if (error < thresh) return true;
487             return false;
488         }
serializehartree_leaf_op489         template <typename Archive> void serialize (Archive& ar) {
490             ar & f & k;
491         }
492     };
493 
494     /// returns true if the result of the convolution operator op with some provided
495     /// coefficients will be small
496     template<typename T, size_t NDIM, typename opT>
497     struct op_leaf_op {
498         typedef FunctionImpl<T,NDIM> implT;
499 
500         const opT* op;    ///< the convolution operator
501         const implT* f;   ///< the source or result function, needed for truncate_tol
do_error_leaf_opop_leaf_op502         bool do_error_leaf_op() const {return true;}
503 
op_leaf_opop_leaf_op504         op_leaf_op() {}
op_leaf_opop_leaf_op505         op_leaf_op(const opT* op, const implT* f) : op(op), f(f) {}
506 
507         /// pre-determination: we can't know if this will be a leaf node before we got the final coeffs
operatorop_leaf_op508         bool operator()(const Key<NDIM>& key) const {return true;}
509 
510         /// post-determination: return true if operator and coefficient norms are small
operatorop_leaf_op511         bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
512             if (key.level()<2) return false;
513             const double cnorm=coeff.normf();
514             return this->operator()(key,cnorm);
515         }
516 
517         /// post-determination: return true if operator and coefficient norms are small
operatorop_leaf_op518         bool operator()(const Key<NDIM>& key, const double& cnorm) const {
519             if (key.level()<2) return false;
520 
521             typedef Key<opT::opdim> opkeyT;
522             const opkeyT source=op->get_source_key(key);
523 
524             const double thresh=f->truncate_tol(f->get_thresh(),key);
525             const std::vector<opkeyT>& disp = op->get_disp(key.level());
526             const opkeyT& d = *disp.begin();         // use the zero-displacement for screening
527             const double opnorm = op->norm(key.level(), d, source);
528             const double norm=opnorm*cnorm;
529             return norm<thresh;
530 
531         }
532 
serializeop_leaf_op533         template <typename Archive> void serialize (Archive& ar) {
534             ar & op & f;
535         }
536 
537     };
538 
539 
540     /// returns true if the result of a hartree_product is a leaf node
541     /// criteria are error, norm and its effect on a convolution operator
542     template<typename T, size_t NDIM, size_t LDIM, typename opT>
543     struct hartree_convolute_leaf_op {
544 
545         typedef FunctionImpl<T,NDIM> implT;
546         typedef FunctionImpl<T,LDIM> implL;
547 
548         const FunctionImpl<T,NDIM>* f;
549         const implL* g;     // for use of its cdata only
550         const opT* op;
do_error_leaf_ophartree_convolute_leaf_op551         bool do_error_leaf_op() const {return false;}
552 
hartree_convolute_leaf_ophartree_convolute_leaf_op553         hartree_convolute_leaf_op() {}
hartree_convolute_leaf_ophartree_convolute_leaf_op554         hartree_convolute_leaf_op(const implT* f, const implL* g, const opT* op)
555             : f(f), g(g), op(op) {}
556 
557         /// no pre-determination
operatorhartree_convolute_leaf_op558         bool operator()(const Key<NDIM>& key) const {return true;}
559 
560         /// no post-determination
operatorhartree_convolute_leaf_op561         bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
562             MADNESS_EXCEPTION("no post-determination in hartree_convolute_leaf_op",1);
563             return true;
564         }
565 
566         /// post-determination: true if f is a leaf and the result is well-represented
567 
568         /// @param[in]  key the hi-dimensional key (breaks into keys for f and g)
569         /// @param[in]  fcoeff coefficients of f of its appropriate key in NS form
570         /// @param[in]  gcoeff coefficients of g of its appropriate key in NS form
operatorhartree_convolute_leaf_op571         bool operator()(const Key<NDIM>& key, const Tensor<T>& fcoeff, const Tensor<T>& gcoeff) const {
572             //        bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const {
573 
574             if (key.level()<2) return false;
575 
576             const double tol=f->get_thresh();
577             const double thresh=f->truncate_tol(tol, key);
578             // include the wavelets in the norm, makes it much more accurate
579             const double fnorm=fcoeff.normf();
580             const double gnorm=gcoeff.normf();
581 
582             // norm of the scaling function coefficients
583             const double sfnorm=fcoeff(g->get_cdata().s0).normf();
584             const double sgnorm=gcoeff(g->get_cdata().s0).normf();
585 
586             // if the final norm is small, perform the hartree product and return
587             const double norm=fnorm*gnorm;  // computing the outer product
588             if (norm < thresh) return true;
589 
590             // get the error of both functions and of the pair function
591             const double ferror=sqrt(fnorm*fnorm-sfnorm*sfnorm);
592             const double gerror=sqrt(gnorm*gnorm-sgnorm*sgnorm);
593 
594             // if the expected error is small, perform the hartree product and return
595             const double error=fnorm*gerror + ferror*gnorm + ferror*gerror;
596             if (error < thresh) return true;
597 
598             // now check if the norm of this and the norm of the operator are significant
599             const std::vector<Key<NDIM> >& disp = op->get_disp(key.level());
600             const Key<NDIM>& d = *disp.begin();         // use the zero-displacement for screening
601             const double opnorm = op->norm(key.level(), d, key);
602             const double final_norm=opnorm*sfnorm*sgnorm;
603             if (final_norm < thresh) return true;
604 
605             return false;
606         }
serializehartree_convolute_leaf_op607         template <typename Archive> void serialize (Archive& ar) {
608             ar & f & op;
609         }
610     };
611 
612     template<typename T, size_t NDIM>
613     struct noop {
operatornoop614     	void operator()(const Key<NDIM>& key, const GenTensor<T>& coeff, const bool& is_leaf) const {}
operatornoop615         bool operator()(const Key<NDIM>& key, const GenTensor<T>& fcoeff, const GenTensor<T>& gcoeff) const {
616             MADNESS_EXCEPTION("in noop::operator()",1);
617             return true;
618         }
serializenoop619         template <typename Archive> void serialize (Archive& ar) {}
620 
621     };
622 
623     template<typename T, std::size_t NDIM>
624     struct insert_op {
625     	typedef FunctionImpl<T,NDIM> implT;
626     	typedef Key<NDIM> keyT;
627     	typedef GenTensor<T> coeffT;
628     	typedef FunctionNode<T,NDIM> nodeT;
629 
630     	implT* impl;
insert_opinsert_op631     	insert_op() : impl() {}
insert_opinsert_op632     	insert_op(implT* f) : impl(f) {}
insert_opinsert_op633     	insert_op(const insert_op& other) : impl(other.impl) {}
operatorinsert_op634     	void operator()(const keyT& key, const coeffT& coeff, const bool& is_leaf) const {
635             impl->get_coeffs().replace(key,nodeT(coeff,not is_leaf));
636     	}
serializeinsert_op637         template <typename Archive> void serialize (Archive& ar) {
638             ar & impl;
639         }
640 
641     };
642 
643     template<size_t NDIM>
644     struct true_op {
645 
646     	template<typename T>
operatortrue_op647         bool operator()(const Key<NDIM>& key, const T& t) const {return true;}
648 
649     	template<typename T, typename R>
operatortrue_op650         bool operator()(const Key<NDIM>& key, const T& t, const R& r) const {return true;}
serializetrue_op651         template <typename Archive> void serialize (Archive& ar) {}
652 
653     };
654 
655     /// shallow-copy, pared-down version of FunctionNode, for special purpose only
656     template<typename T, std::size_t NDIM>
657     struct ShallowNode {
658         typedef GenTensor<T> coeffT;
659         coeffT _coeffs;
660         bool _has_children;
ShallowNodeShallowNode661         ShallowNode() : _coeffs(), _has_children(false) {}
ShallowNodeShallowNode662         ShallowNode(const FunctionNode<T,NDIM>& node)
663             : _coeffs(node.coeff()), _has_children(node.has_children()) {}
ShallowNodeShallowNode664         ShallowNode(const ShallowNode<T,NDIM>& node)
665             : _coeffs(node.coeff()), _has_children(node._has_children) {}
666 
coeffShallowNode667         const coeffT& coeff() const {return _coeffs;}
coeffShallowNode668         coeffT& coeff() {return _coeffs;}
has_childrenShallowNode669         bool has_children() const {return _has_children;}
is_leafShallowNode670         bool is_leaf() const {return not _has_children;}
671         template <typename Archive>
serializeShallowNode672         void serialize(Archive& ar) {
673             ar & coeff() & _has_children;
674         }
675     };
676 
677 
678     /// a class to track where relevant (parent) coeffs are
679 
680     /// E.g. if a 6D function is composed of two 3D functions their coefficients must be tracked.
681     /// We might need coeffs from a box that does not exist, and to avoid searching for
682     /// parents we track which are their required respective boxes.
683     ///  - CoeffTracker will refer either to a requested key, if it exists, or to its
684     ///    outermost parent.
685     ///  - Children must be made in sequential order to be able to track correctly.
686     ///
687     /// Usage: 	1. make the child of a given CoeffTracker.
688     ///			   If the parent CoeffTracker refers to a leaf node (flag is_leaf)
689     ///            the child will refer to the same node. Otherwise it will refer
690     ///            to the child node.
691     ///			2. retrieve its coefficients (possible communication/ returns a Future).
692     ///            Member variable key always refers to an existing node,
693     ///            so we can fetch it. Once we have the node we can determine
694     ///            if it has children which allows us to make a child (see 1. )
695     template<typename T, size_t NDIM>
696     class CoeffTracker {
697 
698     	typedef FunctionImpl<T,NDIM> implT;
699     	typedef Key<NDIM> keyT;
700     	typedef GenTensor<T> coeffT;
701         typedef std::pair<Key<NDIM>,ShallowNode<T,NDIM> > datumT;
702         enum LeafStatus {no, yes, unknown};
703 
704         /// the funcimpl that has the coeffs
705     	const implT* impl;
706     	/// the current key, which must exists in impl
707     	keyT key_;
708     	/// flag if key is a leaf node
709     	LeafStatus is_leaf_;
710     	/// the coefficients belonging to key
711     	coeffT coeff_;
712     public:
713 
714     	/// default ctor
CoeffTracker()715     	CoeffTracker() : impl(), key_(), is_leaf_(unknown), coeff_() {}
716 
717     	/// the initial ctor making the root key
CoeffTracker(const implT * impl)718     	CoeffTracker(const implT* impl) : impl(impl), is_leaf_(no) {
719             if (impl) key_=impl->get_cdata().key0;
720     	}
721 
722     	/// ctor with a pair<keyT,nodeT>
CoeffTracker(const CoeffTracker & other,const datumT & datum)723     	explicit CoeffTracker(const CoeffTracker& other, const datumT& datum) : impl(other.impl), key_(other.key_),
724                                                                                 coeff_(datum.second.coeff()) {
725             if (datum.second.is_leaf()) is_leaf_=yes;
726             else is_leaf_=no;
727     	}
728 
729     	/// copy ctor
CoeffTracker(const CoeffTracker & other)730     	CoeffTracker(const CoeffTracker& other) : impl(other.impl), key_(other.key_),
731                                                   is_leaf_(other.is_leaf_), coeff_(other.coeff_) {}
732 
733     	/// const reference to impl
get_impl()734     	const implT* get_impl() const {return impl;}
735 
736     	/// const reference to the coeffs
coeff()737     	const coeffT& coeff() const {return coeff_;}
738 
739     	/// const reference to the key
key()740     	const keyT& key() const {return key_;}
741 
742     	/// return the coefficients belonging to the passed-in key
743 
744     	/// if key equals tracked key just return the coeffs, otherwise
745     	/// make the child coefficients.
746     	/// @param[in]	key		return coeffs corresponding to this key
747     	/// @return 	coefficients belonging to key
coeff(const keyT & key)748     	coeffT coeff(const keyT& key) const {
749             MADNESS_ASSERT(impl);
750             if (impl->is_compressed() or impl->is_nonstandard())
751                 return impl->parent_to_child_NS(key,key_,coeff_);
752             return impl->parent_to_child(coeff_,key_,key);
753     	}
754 
755     	/// const reference to is_leaf flag
is_leaf()756     	const LeafStatus& is_leaf() const {return is_leaf_;}
757 
758     	/// make a child of this, ignoring the coeffs
make_child(const keyT & child)759     	CoeffTracker make_child(const keyT& child) const {
760 
761             // fast return
762             if ((not impl) or impl->is_on_demand()) return CoeffTracker(*this);
763 
764             // can't make a child without knowing if this is a leaf -- activate first
765             MADNESS_ASSERT((is_leaf_==yes) or (is_leaf_==no));
766 
767             CoeffTracker result;
768             if (impl) {
769                 result.impl=impl;
770                 if (is_leaf_==yes) result.key_=key_;
771                 if (is_leaf_==no) {
772                     result.key_=child;
773                     // check if child is direct descendent of this, but root node is special case
774                     if (child.level()>0) MADNESS_ASSERT(result.key().level()==key().level()+1);
775                 }
776                 result.is_leaf_=unknown;
777             }
778             return result;
779     	}
780 
781     	/// find the coefficients
782 
783     	/// this involves communication to a remote node
784     	/// @return	a Future<CoeffTracker> with the coefficients that key refers to
activate()785     	Future<CoeffTracker> activate() const {
786 
787             // fast return
788             if (not impl) return Future<CoeffTracker>(CoeffTracker());
789             if (impl->is_on_demand()) return Future<CoeffTracker>(CoeffTracker(impl));
790 
791             // this will return a <keyT,nodeT> from a remote node
792             ProcessID p=impl->get_coeffs().owner(key());
793             Future<datumT> datum1=impl->task(p, &implT::find_datum,key_,TaskAttributes::hipri());
794 
795             // construct a new CoeffTracker locally
796             return impl->world.taskq.add(*const_cast<CoeffTracker*> (this),
797                                          &CoeffTracker::forward_ctor,*this,datum1);
798     	}
799 
800     private:
801     	/// taskq-compatible forwarding to the ctor
forward_ctor(const CoeffTracker & other,const datumT & datum)802     	CoeffTracker forward_ctor(const CoeffTracker& other, const datumT& datum) const {
803             return CoeffTracker(other,datum);
804     	}
805 
806     public:
807     	/// serialization
serialize(const Archive & ar)808         template <typename Archive> void serialize(const Archive& ar) {
809             int il=int(is_leaf_);
810             ar & impl & key_ & il & coeff_;
811             is_leaf_=LeafStatus(il);
812         }
813     };
814 
815     template<typename T, std::size_t NDIM>
816     std::ostream&
817     operator<<(std::ostream& s, const CoeffTracker<T,NDIM>& ct) {
818         s << ct.key() << ct.is_leaf() << " " << ct.get_impl();
819         return s;
820     }
821 
822     /// FunctionImpl holds all Function state to facilitate shallow copy semantics
823 
824     /// Since Function assignment and copy constructors are shallow it
825     /// greatly simplifies maintaining consistent state to have all
826     /// (permanent) state encapsulated in a single class.  The state
827     /// is shared between instances using a shared_ptr<FunctionImpl>.
828     ///
829     /// The FunctionImpl inherits all of the functionality of WorldContainer
830     /// (to store the coefficients) and WorldObject<WorldContainer> (used
831     /// for RMI and for its unqiue id).
832     ///
833     /// The class methods are public to avoid painful multiple friend template
834     /// declarations for Function and FunctionImpl ... but this trust should not be
835     /// abused ... NOTHING except FunctionImpl methods should mess with FunctionImplData.
836     /// The LB stuff might have to be an exception.
837     template <typename T, std::size_t NDIM>
838     class FunctionImpl : public WorldObject< FunctionImpl<T,NDIM> > {
839     private:
840         typedef WorldObject< FunctionImpl<T,NDIM> > woT; ///< Base class world object type
841     public:
842         typedef FunctionImpl<T,NDIM> implT; ///< Type of this class (implementation)
843         typedef std::shared_ptr< FunctionImpl<T,NDIM> > pimplT; ///< pointer to this class
844         typedef Tensor<T> tensorT; ///< Type of tensor for anything but to hold coeffs
845         typedef Vector<Translation,NDIM> tranT; ///< Type of array holding translation
846         typedef Key<NDIM> keyT; ///< Type of key
847         typedef FunctionNode<T,NDIM> nodeT; ///< Type of node
848         typedef GenTensor<T> coeffT; ///< Type of tensor used to hold coeffs
849         typedef WorldContainer<keyT,nodeT> dcT; ///< Type of container holding the coefficients
850         typedef std::pair<const keyT,nodeT> datumT; ///< Type of entry in container
851         typedef Vector<double,NDIM> coordT; ///< Type of vector holding coordinates
852 
853         //template <typename Q, int D> friend class Function;
854         template <typename Q, std::size_t D> friend class FunctionImpl;
855 
856         World& world;
857 
858         /// getter
get_initial_level()859         int get_initial_level()const{return initial_level;}
get_special_level()860         int get_special_level()const{return special_level;}
get_special_points()861         const std::vector<Vector<double,NDIM> >& get_special_points()const{return special_points;}
862 
863     private:
864         int k; ///< Wavelet order
865         double thresh; ///< Screening threshold
866         int initial_level; ///< Initial level for refinement
867         int special_level; ///< Minimium level for refinement on special points
868         std::vector<Vector<double,NDIM> > special_points; ///< special points for further refinement (needed for composite functions or multiplication)
869         int max_refine_level; ///< Do not refine below this level
870         int truncate_mode; ///< 0=default=(|d|<thresh), 1=(|d|<thresh/2^n), 1=(|d|<thresh/4^n);
871         bool autorefine; ///< If true, autorefine where appropriate
872         bool truncate_on_project; ///< If true projection inserts at level n-1 not n
873         bool nonstandard; ///< If true, compress keeps scaling coeff
874         TensorArgs targs; ///< type of tensor to be used in the FunctionNodes
875 
876         const FunctionCommonData<T,NDIM>& cdata;
877 
878         std::shared_ptr< FunctionFunctorInterface<T,NDIM> > functor;
879 
880         bool on_demand; ///< does this function have an additional functor?
881         bool compressed; ///< Compression status
882         bool redundant; ///< If true, function keeps sum coefficients on all levels
883 
884         dcT coeffs; ///< The coefficients
885 
886         // Disable the default copy constructor
887         FunctionImpl(const FunctionImpl<T,NDIM>& p);
888 
889     public:
890         Timer timer_accumulate;
891         Timer timer_lr_result;
892         Timer timer_filter;
893         Timer timer_compress_svd;
894         Timer timer_target_driven;
895         bool do_new;
896         AtomicInt small;
897         AtomicInt large;
898 
899         /// Initialize function impl from data in factory
FunctionImpl(const FunctionFactory<T,NDIM> & factory)900         FunctionImpl(const FunctionFactory<T,NDIM>& factory)
901             : WorldObject<implT>(factory._world)
902             , world(factory._world)
903             , k(factory._k)
904             , thresh(factory._thresh)
905             , initial_level(factory._initial_level)
906 	    , special_level(factory._special_level)
907 	    , special_points(factory._special_points)
908             , max_refine_level(factory._max_refine_level)
909             , truncate_mode(factory._truncate_mode)
910             , autorefine(factory._autorefine)
911             , truncate_on_project(factory._truncate_on_project)
912             , nonstandard(false)
913             , targs(factory._thresh,FunctionDefaults<NDIM>::get_tensor_type())
914             , cdata(FunctionCommonData<T,NDIM>::get(k))
915             , functor(factory.get_functor())
916             , on_demand(factory._is_on_demand)
917             , compressed(factory._compressed)
918             , redundant(false)
919             , coeffs(world,factory._pmap,false)
920             //, bc(factory._bc)
921         {
922             // PROFILE_MEMBER_FUNC(FunctionImpl); // No need to profile this
923             // !!! Ensure that all local state is correctly formed
924             // before invoking process_pending for the coeffs and
925             // for this.  Otherwise, there is a race condition.
926             MADNESS_ASSERT(k>0 && k<=MAXK);
927 
928             bool empty = (factory._empty or is_on_demand());
929             bool do_refine = factory._refine;
930 
931             if (do_refine)
932                 initial_level = std::max(0,initial_level - 1);
933 
934             if (empty) { // Do not set any coefficients at all
935                 // additional functors are only evaluated on-demand
936             } else if (functor) { // Project function and optionally refine
937                 insert_zero_down_to_initial_level(cdata.key0);
938                 typename dcT::const_iterator end = coeffs.end();
939                 for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
940                     if (it->second.is_leaf())
941                         woT::task(coeffs.owner(it->first), &implT::project_refine_op, it->first, do_refine,
942                                   functor->special_points());
943                 }
944             }
945             else { // Set as if a zero function
946                 initial_level = 1;
947                 insert_zero_down_to_initial_level(keyT(0));
948             }
949 
950             coeffs.process_pending();
951             this->process_pending();
952             if (factory._fence && (functor || !empty)) world.gop.fence();
953         }
954 
955         /// Copy constructor
956 
957         /// Allocates a \em new function in preparation for a deep copy
958         ///
959         /// By default takes pmap from other but can also specify a different pmap.
960         /// Does \em not copy the coefficients ... creates an empty container.
961         template <typename Q>
FunctionImpl(const FunctionImpl<Q,NDIM> & other,const std::shared_ptr<WorldDCPmapInterface<Key<NDIM>>> & pmap,bool dozero)962         FunctionImpl(const FunctionImpl<Q,NDIM>& other,
963                      const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& pmap,
964                      bool dozero)
965             : WorldObject<implT>(other.world)
966             , world(other.world)
967             , k(other.k)
968             , thresh(other.thresh)
969             , initial_level(other.initial_level)
970 	    , special_level(other.special_level)
971 	    , special_points(other.special_points)
972             , max_refine_level(other.max_refine_level)
973             , truncate_mode(other.truncate_mode)
974                          , autorefine(other.autorefine)
975                          , truncate_on_project(other.truncate_on_project)
976                          , nonstandard(other.nonstandard)
977                          , targs(other.targs)
978                          , cdata(FunctionCommonData<T,NDIM>::get(k))
979                          , functor()
980                          , on_demand(false)	// since functor() is an default ctor
981                          , compressed(other.compressed)
982                          , redundant(other.redundant)
983                          , coeffs(world, pmap ? pmap : other.coeffs.get_pmap())
984                          //, bc(other.bc)
985         {
986             if (dozero) {
987                 initial_level = 1;
988                 insert_zero_down_to_initial_level(cdata.key0);
989 		//world.gop.fence(); <<<<<<<<<<<<<<<<<<<<<<   needs a fence argument
990             }
991             coeffs.process_pending();
992             this->process_pending();
993         }
994 
~FunctionImpl()995         virtual ~FunctionImpl() { }
996 
997         const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& get_pmap() const;
998 
999         /// Copy coeffs from other into self
1000         template <typename Q>
copy_coeffs(const FunctionImpl<Q,NDIM> & other,bool fence)1001         void copy_coeffs(const FunctionImpl<Q,NDIM>& other, bool fence) {
1002             typename FunctionImpl<Q,NDIM>::dcT::const_iterator end = other.coeffs.end();
1003             for (typename FunctionImpl<Q,NDIM>::dcT::const_iterator it=other.coeffs.begin();
1004                  it!=end; ++it) {
1005                 const keyT& key = it->first;
1006                 const typename FunctionImpl<Q,NDIM>::nodeT& node = it->second;
1007                 coeffs.replace(key,node. template convert<T>());
1008             }
1009             if (fence)
1010                 world.gop.fence();
1011         }
1012 
1013         /// perform inplace gaxpy: this = alpha*this + beta*other
1014         /// @param[in]	alpha	prefactor for this
1015         /// @param[in]	beta	prefactor for other
1016         /// @param[in]	g       the other function, reconstructed
1017         template<typename Q, typename R>
gaxpy_inplace_reconstructed(const T & alpha,const FunctionImpl<Q,NDIM> & g,const R & beta,const bool fence)1018         void gaxpy_inplace_reconstructed(const T& alpha, const FunctionImpl<Q,NDIM>& g, const R& beta, const bool fence) {
1019             // merge g's tree into this' tree
1020             this->merge_trees(beta,g,alpha,true);
1021 
1022             // sum down the sum coeffs into the leafs
1023             if (world.rank() == coeffs.owner(cdata.key0)) sum_down_spawn(cdata.key0, coeffT());
1024             if (fence) world.gop.fence();
1025         }
1026 
1027         /// merge the trees of this and other, while multiplying them with the alpha or beta, resp
1028 
1029         /// first step in an inplace gaxpy operation for reconstructed functions; assuming the same
1030         /// distribution for this and other
1031 
1032         /// on output, *this = alpha* *this + beta * other
1033         /// @param[in]	alpha	prefactor for this
1034         /// @param[in]	beta	prefactor for other
1035         /// @param[in]	other	the other function, reconstructed
1036         template<typename Q, typename R>
1037         void merge_trees(const T alpha, const FunctionImpl<Q,NDIM>& other, const R beta, const bool fence=true) {
1038             MADNESS_ASSERT(get_pmap() == other.get_pmap());
1039             other.flo_unary_op_node_inplace(do_merge_trees<Q,R>(alpha,beta,*this),fence);
1040             if (fence) world.gop.fence();
1041         }
1042 
1043 
1044         /// perform: this= alpha*f + beta*g, invoked by result
1045 
1046         /// f and g are reconstructed, so we can save on the compress operation,
1047         /// walk down the joint tree, and add leaf coefficients; effectively refines
1048         /// to common finest level.
1049 
1050         /// nothing returned, but leaves this's tree reconstructed and as sum of f and g
1051         /// @param[in]  alpha   prefactor for f
1052         /// @param[in]  f       first addend
1053         /// @param[in]  beta    prefactor for g
1054         /// @param[in]  g       second addend
1055         void gaxpy_oop_reconstructed(const double alpha, const implT& f,
1056                                      const double beta, const implT& g, const bool fence);
1057 
1058         /// functor for the gaxpy_inplace method
1059         template <typename Q, typename R>
1060         struct do_gaxpy_inplace {
1061             typedef Range<typename FunctionImpl<Q,NDIM>::dcT::const_iterator> rangeT;
1062             FunctionImpl<T,NDIM>* f; ///< prefactor for current function impl
1063             T alpha; ///< the current function impl
1064             R beta; ///< prefactor for other function impl
do_gaxpy_inplacedo_gaxpy_inplace1065             do_gaxpy_inplace() {};
do_gaxpy_inplacedo_gaxpy_inplace1066             do_gaxpy_inplace(FunctionImpl<T,NDIM>* f, T alpha, R beta) : f(f), alpha(alpha), beta(beta) {}
operatordo_gaxpy_inplace1067             bool operator()(typename rangeT::iterator& it) const {
1068                 const keyT& key = it->first;
1069                 const FunctionNode<Q,NDIM>& other_node = it->second;
1070                 // Use send to get write accessor and automated construction if missing
1071                 f->coeffs.send(key, &nodeT:: template gaxpy_inplace<Q,R>, alpha, other_node, beta);
1072                 return true;
1073             }
1074             template <typename Archive>
serializedo_gaxpy_inplace1075             void serialize(Archive& ar) {}
1076         };
1077 
1078         /// Inplace general bilinear operation
1079         /// @param[in]  alpha   prefactor for the current function impl
1080         /// @param[in]  other   the other function impl
1081         /// @param[in]  beta    prefactor for other
1082         template <typename Q, typename R>
gaxpy_inplace(const T & alpha,const FunctionImpl<Q,NDIM> & other,const R & beta,bool fence)1083         void gaxpy_inplace(const T& alpha,const FunctionImpl<Q,NDIM>& other, const R& beta, bool fence) {
1084             MADNESS_ASSERT(get_pmap() == other.get_pmap());
1085             if (alpha != T(1.0)) scale_inplace(alpha,false);
1086             typedef Range<typename FunctionImpl<Q,NDIM>::dcT::const_iterator> rangeT;
1087             typedef do_gaxpy_inplace<Q,R> opT;
1088             world.taskq.for_each<rangeT,opT>(rangeT(other.coeffs.begin(), other.coeffs.end()), opT(this, T(1.0), beta));
1089             if (fence)
1090                 world.gop.fence();
1091         }
1092 
1093         // loads a function impl from persistence
1094         // @param[in] ar   the archive where the function impl is stored
1095         template <typename Archive>
load(Archive & ar)1096         void load(Archive& ar) {
1097             // WE RELY ON K BEING STORED FIRST
1098             int kk = 0;
1099             ar & kk;
1100 
1101             MADNESS_ASSERT(kk==k);
1102 
1103             // note that functor should not be (re)stored
1104             ar & thresh & initial_level & max_refine_level & truncate_mode
1105                 & autorefine & truncate_on_project & nonstandard & compressed ; //& bc;
1106 
1107             ar & coeffs;
1108             world.gop.fence();
1109         }
1110 
1111         // saves a function impl to persistence
1112         // @param[in] ar   the archive where the function impl is to be stored
1113         template <typename Archive>
store(Archive & ar)1114         void store(Archive& ar) {
1115             // WE RELY ON K BEING STORED FIRST
1116 
1117             // note that functor should not be (re)stored
1118             ar & k & thresh & initial_level & max_refine_level & truncate_mode
1119                 & autorefine & truncate_on_project & nonstandard & compressed ; //& bc;
1120 
1121             ar & coeffs;
1122             world.gop.fence();
1123         }
1124 
1125         /// Returns true if the function is compressed.
1126         bool is_compressed() const;
1127 
1128         /// Returns true if the function is redundant.
1129         bool is_redundant() const;
1130 
1131         bool is_nonstandard() const;
1132 
1133         void set_functor(const std::shared_ptr<FunctionFunctorInterface<T,NDIM> > functor1);
1134 
1135         std::shared_ptr<FunctionFunctorInterface<T,NDIM> > get_functor();
1136 
1137         std::shared_ptr<FunctionFunctorInterface<T,NDIM> > get_functor() const;
1138 
1139         void unset_functor();
1140 
1141         bool& is_on_demand(); // ???????????????????? why returning reference
1142 
1143         const bool& is_on_demand() const; // ?????????????????????
1144 
1145         TensorType get_tensor_type() const;
1146 
1147         TensorArgs get_tensor_args() const;
1148 
1149         double get_thresh() const;
1150 
1151         void set_thresh(double value);
1152 
1153         bool get_autorefine() const;
1154 
1155         void set_autorefine(bool value);
1156 
1157         int get_k() const;
1158 
1159         const dcT& get_coeffs() const;
1160 
1161         dcT& get_coeffs();
1162 
1163         const FunctionCommonData<T,NDIM>& get_cdata() const;
1164 
1165         void accumulate_timer(const double time) const; // !!!!!!!!!!!!  REDUNDANT !!!!!!!!!!!!!!!
1166 
1167         void print_timer() const;
1168 
1169         void reset_timer();
1170 
1171         /// Adds a constant to the function.  Local operation, optional fence
1172 
1173         /// In scaling function basis must add value to first polyn in
1174         /// each box with appropriate scaling for level.  In wavelet basis
1175         /// need only add at level zero.
1176         /// @param[in]  t   the scalar to be added
1177         void add_scalar_inplace(T t, bool fence);
1178 
1179         /// Initialize nodes to zero function at initial_level of refinement.
1180 
1181         /// Works for either basis.  No communication.
1182         void insert_zero_down_to_initial_level(const keyT& key);
1183 
1184         /// Truncate according to the threshold with optional global fence
1185 
1186         /// If thresh<=0 the default value of this->thresh is used
1187         /// @param[in]  tol   the truncation tolerance
1188         void truncate(double tol, bool fence);
1189 
1190         /// Returns true if after truncation this node has coefficients
1191 
1192         /// Assumed to be invoked on process owning key.  Possible non-blocking
1193         /// communication.
1194         /// @param[in]  key   the key of the current function node
1195         Future<bool> truncate_spawn(const keyT& key, double tol);
1196 
1197         /// Actually do the truncate operation
1198         /// @param[in] key the key to the current function node being evaluated for truncation
1199         /// @param[in] tol the tolerance for thresholding
1200         /// @param[in] v vector of Future<bool>'s that specify whether the current nodes children have coeffs
1201         bool truncate_op(const keyT& key, double tol, const std::vector< Future<bool> >& v);
1202 
1203         /// Evaluate function at quadrature points in the specified box
1204 
1205         /// @param[in] key the key indicating where the quadrature points are located
1206         /// @param[in] f the interface to the elementary function
1207         /// @param[in] qx quadrature points on a level=0 box
1208         /// @param[out] fval values
1209         void fcube(const keyT& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, tensorT& fval) const;
1210 
1211         /// Evaluate function at quadrature points in the specified box
1212 
1213         /// @param[in] key the key indicating where the quadrature points are located
1214         /// @param[in] f the interface to the elementary function
1215         /// @param[in] qx quadrature points on a level=0 box
1216         /// @param[out] fval values
1217         void fcube(const keyT& key,  T (*f)(const coordT&), const Tensor<double>& qx, tensorT& fval) const;
1218 
1219         /// Returns cdata.key0
1220         const keyT& key0() const;
1221 
1222         /// Prints the coeffs tree of the current function impl
1223         /// @param[in] maxlevel the maximum level of the tree for printing
1224         /// @param[out] os the ostream to where the output is sent
1225         void print_tree(std::ostream& os = std::cout, Level maxlevel = 10000) const;
1226 
1227         /// Functor for the do_print_tree method
1228         void do_print_tree(const keyT& key, std::ostream& os, Level maxlevel) const;
1229 
1230         /// Prints the coeffs tree of the current function impl (using GraphViz)
1231         /// @param[in] maxlevel the maximum level of the tree for printing
1232         /// @param[out] os the ostream to where the output is sent
1233         void print_tree_graphviz(std::ostream& os = std::cout, Level maxlevel = 10000) const;
1234 
1235         /// Functor for the do_print_tree method (using GraphViz)
1236         void do_print_tree_graphviz(const keyT& key, std::ostream& os, Level maxlevel) const;
1237 
1238         /// convert a number [0,limit] to a hue color code [blue,red],
1239         /// or, if log is set, a number [1.e-10,limit]
1240         struct do_convert_to_color {
1241             double limit;
1242             bool log;
lowerdo_convert_to_color1243             static double lower() {return 1.e-10;};
do_convert_to_colordo_convert_to_color1244             do_convert_to_color() {};
do_convert_to_colordo_convert_to_color1245             do_convert_to_color(const double limit, const bool log) : limit(limit), log(log) {}
operatordo_convert_to_color1246             double operator()(double val) const {
1247                 double color=0.0;
1248 
1249                 if (log) {
1250                     double val2=log10(val) - log10(lower());        // will yield >0.0
1251                     double upper=log10(limit) -log10(lower());
1252                     val2=0.7-(0.7/upper)*val2;
1253                     color= std::max(0.0,val2);
1254                     color= std::min(0.7,color);
1255                 } else {
1256                     double hue=0.7-(0.7/limit)*(val);
1257                     color= std::max(0.0,hue);
1258                 }
1259                 return color;
1260             }
1261         };
1262 
1263 
1264         /// Print a plane ("xy", "xz", or "yz") containing the point x to file
1265 
1266         /// works for all dimensions; we walk through the tree, and if a leaf node
1267         /// inside the sub-cell touches the plane we print it in pstricks format
1268         void print_plane(const std::string filename, const int xaxis, const int yaxis, const coordT& el2);
1269 
1270         /// collect the data for a plot of the MRA structure locally on each node
1271 
1272         /// @param[in]	xaxis	the x-axis in the plot (can be any axis of the MRA box)
1273         /// @param[in]	yaxis	the y-axis in the plot (can be any axis of the MRA box)
1274         /// @param[in]	el2     needs a description
1275         /// \todo Provide a description for el2
1276         Tensor<double> print_plane_local(const int xaxis, const int yaxis, const coordT& el2);
1277 
1278         /// Functor for the print_plane method
1279         /// @param[in] filename the filename for the output
1280         /// @param[in] plotinfo plotting parameters
1281         /// @param[in]	xaxis	the x-axis in the plot (can be any axis of the MRA box)
1282         /// @param[in]	yaxis	the y-axis in the plot (can be any axis of the MRA box)
1283         void do_print_plane(const std::string filename, std::vector<Tensor<double> > plotinfo,
1284                             const int xaxis, const int yaxis, const coordT el2);
1285 
1286         /// print the grid (the roots of the quadrature of each leaf box)
1287         /// of this function in user xyz coordinates
1288         /// @param[in] filename the filename for the output
1289         void print_grid(const std::string filename) const;
1290 
1291         /// return the keys of the local leaf boxes
1292         std::vector<keyT> local_leaf_keys() const;
1293 
1294         /// print the grid in xyz format
1295 
1296         /// the quadrature points and the key information will be written to file,
1297         /// @param[in]	filename	where the quadrature points will be written to
1298         /// @param[in]	keys		all leaf keys
1299         void do_print_grid(const std::string filename, const std::vector<keyT>& keys) const;
1300 
1301         /// read data from a grid
1302 
1303         /// @param[in]	keyfile		file with keys and grid points for each key
1304         /// @param[in]	gridfile 	file with grid points, w/o key, but with same ordering
1305         /// @param[in]	vnuc_functor	subtract the values of this functor if regularization is needed
1306         template<size_t FDIM>
1307         typename std::enable_if<NDIM==FDIM>::type
read_grid(const std::string keyfile,const std::string gridfile,std::shared_ptr<FunctionFunctorInterface<double,NDIM>> vnuc_functor)1308         read_grid(const std::string keyfile, const std::string gridfile,
1309                   std::shared_ptr< FunctionFunctorInterface<double,NDIM> > vnuc_functor) {
1310 
1311             std::ifstream kfile(keyfile.c_str());
1312             std::ifstream gfile(gridfile.c_str());
1313             std::string line;
1314 
1315             long ndata,ndata1;
1316             if (not (std::getline(kfile,line))) MADNESS_EXCEPTION("failed reading 1st line of key data",0);
1317             if (not (std::istringstream(line) >> ndata)) MADNESS_EXCEPTION("failed reading k",0);
1318             if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 1st line of grid data",0);
1319             if (not (std::istringstream(line) >> ndata1)) MADNESS_EXCEPTION("failed reading k",0);
1320             MADNESS_ASSERT(ndata==ndata1);
1321             if (not (std::getline(kfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of key data",0);
1322             if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of grid data",0);
1323 
1324             // the quadrature points in simulation coordinates of the root node
1325             const Tensor<double> qx=cdata.quad_x;
1326             const size_t npt = qx.dim(0);
1327 
1328             // the number of coordinates (grid point tuples) per box ({x1},{x2},{x3},..,{xNDIM})
1329             long npoints=power<NDIM>(npt);
1330             // the number of boxes
1331             long nboxes=ndata/npoints;
1332             MADNESS_ASSERT(nboxes*npoints==ndata);
1333             print("reading ",nboxes,"boxes from file",gridfile,keyfile);
1334 
1335             // these will be the data
1336             Tensor<T> values(cdata.vk,false);
1337 
1338             int ii=0;
1339             std::string gline,kline;
1340             //            while (1) {
1341             while (std::getline(kfile,kline)) {
1342 
1343                 double x,y,z,x1,y1,z1,val;
1344 
1345                 // get the key
1346                 long nn;
1347                 Translation l1,l2,l3;
1348                 // line looks like: # key:      n      l1   l2   l3
1349                 kline.erase(0,7);
1350                 std::stringstream(kline) >>  nn >> l1 >> l2 >> l3;
1351                 //				kfile >> s >>  nn >> l1 >> l2 >> l3;
1352                 const Vector<Translation,3> ll{ l1,l2,l3 };
1353                 Key<3> key(nn,ll);
1354 
1355                 // this is borrowed from fcube
1356                 const Vector<Translation,3>& l = key.translation();
1357                 const Level n = key.level();
1358                 const double h = std::pow(0.5,double(n));
1359                 coordT c; // will hold the point in user coordinates
1360                 const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width();
1361                 const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell();
1362 
1363 
1364                 if (NDIM == 3) {
1365                     for (int i=0; i<npt; ++i) {
1366                         c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
1367                         for (int j=0; j<npt; ++j) {
1368                             c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
1369                             for (int k=0; k<npt; ++k) {
1370                                 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
1371                                 //								fprintf(pFile,"%18.12f %18.12f %18.12f\n",c[0],c[1],c[2]);
1372                                 auto& success1 = std::getline(gfile,gline); MADNESS_ASSERT(success1);
1373                                 auto& success2 = std::getline(kfile,kline); MADNESS_ASSERT(success2);
1374                                 std::istringstream(gline) >> x >> y >> z >> val;
1375                                 std::istringstream(kline) >> x1 >> y1 >> z1;
1376                                 MADNESS_ASSERT(std::fabs(x-c[0])<1.e-4);
1377                                 MADNESS_ASSERT(std::fabs(x1-c[0])<1.e-4);
1378                                 MADNESS_ASSERT(std::fabs(y-c[1])<1.e-4);
1379                                 MADNESS_ASSERT(std::fabs(y1-c[1])<1.e-4);
1380                                 MADNESS_ASSERT(std::fabs(z-c[2])<1.e-4);
1381                                 MADNESS_ASSERT(std::fabs(z1-c[2])<1.e-4);
1382 
1383                                 // regularize if a functor is given
1384                                 if (vnuc_functor) val-=(*vnuc_functor)(c);
1385                                 values(i,j,k)=val;
1386                             }
1387                         }
1388                     }
1389                 } else {
1390                     MADNESS_EXCEPTION("only NDIM=3 in print_grid",0);
1391                 }
1392 
1393                 // insert the new leaf node
1394                 const bool has_children=false;
1395                 coeffT coeff=coeffT(this->values2coeffs(key,values),targs);
1396                 nodeT node(coeff,has_children);
1397                 coeffs.replace(key,node);
1398                 const_cast<dcT&>(coeffs).send(key.parent(), &FunctionNode<T,NDIM>::set_has_children_recursive, coeffs, key.parent());
1399                 ii++;
1400             }
1401 
1402             kfile.close();
1403             gfile.close();
1404             MADNESS_ASSERT(ii==nboxes);
1405 
1406         }
1407 
1408 
1409         /// read data from a grid
1410 
1411         /// @param[in]	gridfile		file with keys and grid points and values for each key
1412         /// @param[in]	vnuc_functor	subtract the values of this functor if regularization is needed
1413         template<size_t FDIM>
1414         typename std::enable_if<NDIM==FDIM>::type
read_grid2(const std::string gridfile,std::shared_ptr<FunctionFunctorInterface<double,NDIM>> vnuc_functor)1415         read_grid2(const std::string gridfile,
1416                    std::shared_ptr< FunctionFunctorInterface<double,NDIM> > vnuc_functor) {
1417 
1418             std::ifstream gfile(gridfile.c_str());
1419             std::string line;
1420 
1421             long ndata;
1422             if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 1st line of grid data",0);
1423             if (not (std::istringstream(line) >> ndata)) MADNESS_EXCEPTION("failed reading k",0);
1424             if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of grid data",0);
1425 
1426             // the quadrature points in simulation coordinates of the root node
1427             const Tensor<double> qx=cdata.quad_x;
1428             const size_t npt = qx.dim(0);
1429 
1430             // the number of coordinates (grid point tuples) per box ({x1},{x2},{x3},..,{xNDIM})
1431             long npoints=power<NDIM>(npt);
1432             // the number of boxes
1433             long nboxes=ndata/npoints;
1434             MADNESS_ASSERT(nboxes*npoints==ndata);
1435             print("reading ",nboxes,"boxes from file",gridfile);
1436 
1437             // these will be the data
1438             Tensor<T> values(cdata.vk,false);
1439 
1440             int ii=0;
1441             std::string gline;
1442             //            while (1) {
1443             while (std::getline(gfile,gline)) {
1444 
1445                 double x1,y1,z1,val;
1446 
1447                 // get the key
1448                 long nn;
1449                 Translation l1,l2,l3;
1450                 // line looks like: # key:      n      l1   l2   l3
1451                 gline.erase(0,7);
1452                 std::stringstream(gline) >>  nn >> l1 >> l2 >> l3;
1453                 const Vector<Translation,3> ll{ l1,l2,l3 };
1454                 Key<3> key(nn,ll);
1455 
1456                 // this is borrowed from fcube
1457                 const Vector<Translation,3>& l = key.translation();
1458                 const Level n = key.level();
1459                 const double h = std::pow(0.5,double(n));
1460                 coordT c; // will hold the point in user coordinates
1461                 const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width();
1462                 const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell();
1463 
1464 
1465                 if (NDIM == 3) {
1466                     for (int i=0; i<npt; ++i) {
1467                         c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
1468                         for (int j=0; j<npt; ++j) {
1469                             c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
1470                             for (int k=0; k<npt; ++k) {
1471                                 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
1472 
1473                                 auto& success = std::getline(gfile,gline);
1474                                 MADNESS_ASSERT(success);
1475                                 std::istringstream(gline) >> x1 >> y1 >> z1 >> val;
1476                                 MADNESS_ASSERT(std::fabs(x1-c[0])<1.e-4);
1477                                 MADNESS_ASSERT(std::fabs(y1-c[1])<1.e-4);
1478                                 MADNESS_ASSERT(std::fabs(z1-c[2])<1.e-4);
1479 
1480                                 // regularize if a functor is given
1481                                 if (vnuc_functor) val-=(*vnuc_functor)(c);
1482                                 values(i,j,k)=val;
1483                             }
1484                         }
1485                     }
1486                 } else {
1487                     MADNESS_EXCEPTION("only NDIM=3 in print_grid",0);
1488                 }
1489 
1490                 // insert the new leaf node
1491                 const bool has_children=false;
1492                 coeffT coeff=coeffT(this->values2coeffs(key,values),targs);
1493                 nodeT node(coeff,has_children);
1494                 coeffs.replace(key,node);
1495                 const_cast<dcT&>(coeffs).send(key.parent(),
1496                                               &FunctionNode<T,NDIM>::set_has_children_recursive,
1497                                               coeffs, key.parent());
1498                 ii++;
1499             }
1500 
1501             gfile.close();
1502             MADNESS_ASSERT(ii==nboxes);
1503 
1504         }
1505 
1506 
1507         /// Compute by projection the scaling function coeffs in specified box
1508         /// @param[in] key the key to the current function node (box)
1509         tensorT project(const keyT& key) const;
1510 
1511         /// Returns the truncation threshold according to truncate_method
1512 
1513         /// here is our handwaving argument:
1514         /// this threshold will give each FunctionNode an error of less than tol. The
1515         /// total error can then be as high as sqrt(#nodes) * tol. Therefore in order
1516         /// to account for higher dimensions: divide tol by about the root of number
1517         /// of siblings (2^NDIM) that have a large error when we refine along a deep
1518         /// branch of the tree.
1519         double truncate_tol(double tol, const keyT& key) const;
1520 
1521 
1522         /// Returns patch referring to coeffs of child in parent box
1523         /// @param[in] child the key to the child function node (box)
1524         std::vector<Slice> child_patch(const keyT& child) const;
1525 
1526         /// Projection with optional refinement w/ special points
1527         /// @param[in] key the key to the current function node (box)
1528         /// @param[in] do_refine should we continue refinement?
1529         /// @param[in] specialpts vector of special points in the function where we need
1530         ///            to refine at a much finer level
1531         void project_refine_op(const keyT& key, bool do_refine,
1532                                const std::vector<Vector<double,NDIM> >& specialpts);
1533 
1534         /// Compute the Legendre scaling functions for multiplication
1535 
1536         /// Evaluate parent polyn at quadrature points of a child.  The prefactor of
1537         /// 2^n/2 is included.  The tensor must be preallocated as phi(k,npt).
1538         /// Refer to the implementation notes for more info.
1539         /// @todo Robert please verify this comment. I don't understand this method.
1540         /// @param[in] np level of the parent function node (box)
1541         /// @param[in] nc level of the child function node (box)
1542         /// @param[in] lp translation of the parent function node (box)
1543         /// @param[in] lc translation of the child function node (box)
1544         /// @param[out] phi tensor of the legendre scaling functions
1545         void phi_for_mul(Level np, Translation lp, Level nc, Translation lc, Tensor<double>& phi) const;
1546 
1547         /// Directly project parent coeffs to child coeffs
1548 
1549         /// Currently used by diff, but other uses can be anticipated
1550 
1551         /// @todo is this documentation correct?
1552         /// @param[in]	child	the key whose coeffs we are requesting
1553         /// @param[in]	parent	the (leaf) key of our function
1554         /// @param[in]	s	the (leaf) coeffs belonging to parent
1555         /// @return 	coeffs
1556         const coeffT parent_to_child(const coeffT& s, const keyT& parent, const keyT& child) const;
1557 
1558         /// Directly project parent NS coeffs to child NS coeffs
1559 
1560         /// return the NS coefficients if parent and child are the same,
1561         /// or construct sum coeffs from the parents and "add" zero wavelet coeffs
1562         /// @param[in]	child	the key whose coeffs we are requesting
1563         /// @param[in]	parent	the (leaf) key of our function
1564         /// @param[in]	coeff	the (leaf) coeffs belonging to parent
1565         /// @return 	coeffs in NS form
1566         coeffT parent_to_child_NS(const keyT& child, const keyT& parent,
1567                                   const coeffT& coeff) const;
1568 
1569         /// Get the scaling function coeffs at level n starting from NS form
1570         // N=2^n, M=N/q, q must be power of 2
1571         // q=0 return coeffs [N,k] for direct sum
1572         // q>0 return coeffs [k,q,M] for fft sum
1573         tensorT coeffs_for_jun(Level n, long q=0);
1574 
1575         /// Return the values when given the coeffs in scaling function basis
1576         /// @param[in] key the key of the function node (box)
1577         /// @param[in] coeff the tensor of scaling function coefficients for function node (box)
1578         /// @return function values for function node (box)
1579         template <typename Q>
coeffs2values(const keyT & key,const GenTensor<Q> & coeff)1580         GenTensor<Q> coeffs2values(const keyT& key, const GenTensor<Q>& coeff) const {
1581             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1582             double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1583             return transform(coeff,cdata.quad_phit).scale(scale);
1584         }
1585 
1586         /// convert S or NS coeffs to values on a 2k grid of the children
1587 
1588         /// equivalent to unfiltering the NS coeffs and then converting all child S-coeffs
1589         /// to values in their respective boxes. If only S coeffs are provided d coeffs are
1590         /// assumed to be zero. Reverse operation to values2NScoeffs().
1591         /// @param[in]	key	the key of the current S or NS coeffs, level n
1592         /// @param[in]	coeff coeffs in S or NS form; if S then d coeffs are assumed zero
1593         /// @param[in]	s_only	sanity check to avoid unintended discard of d coeffs
1594         /// @return		function values on the quadrature points of the children of child (!)
1595         template <typename Q>
NScoeffs2values(const keyT & key,const GenTensor<Q> & coeff,const bool s_only)1596         GenTensor<Q> NScoeffs2values(const keyT& key, const GenTensor<Q>& coeff,
1597                                      const bool s_only) const {
1598             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1599 
1600             // sanity checks
1601             MADNESS_ASSERT((coeff.dim(0)==this->get_k()) == s_only);
1602             MADNESS_ASSERT((coeff.dim(0)==this->get_k()) or (coeff.dim(0)==2*this->get_k()));
1603 
1604             // this is a block-diagonal matrix with the quadrature points on the diagonal
1605             Tensor<double> quad_phit_2k(2*cdata.k,2*cdata.npt);
1606             quad_phit_2k(cdata.s[0],cdata.s[0])=cdata.quad_phit;
1607             quad_phit_2k(cdata.s[1],cdata.s[1])=cdata.quad_phit;
1608 
1609             // the transformation matrix unfilters (cdata.hg) and transforms to values in one step
1610             const Tensor<double> transf = (s_only)
1611                 ? inner(cdata.hg(Slice(0,k-1),_),quad_phit_2k)	// S coeffs
1612                 : inner(cdata.hg,quad_phit_2k);					// NS coeffs
1613 
1614             // increment the level since the coeffs2values part happens on level n+1
1615             const double scale = pow(2.0,0.5*NDIM*(key.level()+1))/
1616                 sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1617 
1618             return transform(coeff,transf).scale(scale);
1619         }
1620 
1621         /// Compute the function values for multiplication
1622 
1623         /// Given S or NS coefficients from a parent cell, compute the value of
1624         /// the functions at the quadrature points of a child
1625         /// currently restricted to special cases
1626         /// @param[in]	child	key of the box in which we compute values
1627         /// @param[in]	parent	key of the parent box holding the coeffs
1628         ///	@param[in]	coeff	coeffs of the parent box
1629         /// @param[in]	s_only	sanity check to avoid unintended discard of d coeffs
1630         /// @return		function values on the quadrature points of the children of child (!)
1631         template <typename Q>
NS_fcube_for_mul(const keyT & child,const keyT & parent,const GenTensor<Q> & coeff,const bool s_only)1632         GenTensor<Q> NS_fcube_for_mul(const keyT& child, const keyT& parent,
1633                                       const GenTensor<Q>& coeff, const bool s_only) const {
1634             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1635 
1636             // sanity checks
1637             MADNESS_ASSERT((coeff.dim(0)==this->get_k()) == s_only);
1638             MADNESS_ASSERT((coeff.dim(0)==this->get_k()) or (coeff.dim(0)==2*this->get_k()));
1639 
1640             // fast return if possible
1641             //            if (child.level()==parent.level()) return NScoeffs2values(child,coeff,s_only);
1642 
1643             if (s_only) {
1644 
1645                 Tensor<double> quad_phi[NDIM];
1646                 // tmp tensor
1647                 Tensor<double> phi1(cdata.k,cdata.npt);
1648 
1649                 for (std::size_t d=0; d<NDIM; ++d) {
1650 
1651                     // input is S coeffs (dimension k), output is values on 2*npt grid points
1652                     quad_phi[d]=Tensor<double>(cdata.k,2*cdata.npt);
1653 
1654                     // for both children of "child" evaluate the Legendre polynomials
1655                     // first the left child on level n+1 and translations 2l
1656                     phi_for_mul(parent.level(),parent.translation()[d],
1657                                 child.level()+1, 2*child.translation()[d], phi1);
1658                     quad_phi[d](_,Slice(0,k-1))=phi1;
1659 
1660                     // next the right child on level n+1 and translations 2l+1
1661                     phi_for_mul(parent.level(),parent.translation()[d],
1662                                 child.level()+1, 2*child.translation()[d]+1, phi1);
1663                     quad_phi[d](_,Slice(k,2*k-1))=phi1;
1664                 }
1665 
1666                 const double scale = 1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1667                 return general_transform(coeff,quad_phi).scale(scale);
1668             }
1669             MADNESS_EXCEPTION("you should not be here in NS_fcube_for_mul",1);
1670             return GenTensor<Q>();
1671         }
1672 
1673         /// convert function values of the a child generation directly to NS coeffs
1674 
1675         /// equivalent to converting the function values to 2^NDIM S coeffs and then
1676         /// filtering them to NS coeffs. Reverse operation to NScoeffs2values().
1677         /// @param[in]	key		key of the parent of the generation
1678         /// @param[in]	values	tensor holding function values of the 2^NDIM children of key
1679         /// @return		NS coeffs belonging to key
1680         template <typename Q>
values2NScoeffs(const keyT & key,const GenTensor<Q> & values)1681         GenTensor<Q> values2NScoeffs(const keyT& key, const GenTensor<Q>& values) const {
1682             //PROFILE_MEMBER_FUNC(FunctionImpl);  // Too fine grain for routine profiling
1683 
1684             // sanity checks
1685             MADNESS_ASSERT(values.dim(0)==2*this->get_k());
1686 
1687             // this is a block-diagonal matrix with the quadrature points on the diagonal
1688             Tensor<double> quad_phit_2k(2*cdata.npt,2*cdata.k);
1689             quad_phit_2k(cdata.s[0],cdata.s[0])=cdata.quad_phiw;
1690             quad_phit_2k(cdata.s[1],cdata.s[1])=cdata.quad_phiw;
1691 
1692             // the transformation matrix unfilters (cdata.hg) and transforms to values in one step
1693             const Tensor<double> transf=inner(quad_phit_2k,cdata.hgT);
1694 
1695             // increment the level since the values2coeffs part happens on level n+1
1696             const double scale = pow(0.5,0.5*NDIM*(key.level()+1))
1697                 *sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1698 
1699             return transform(values,transf).scale(scale);
1700         }
1701 
1702         /// Return the scaling function coeffs when given the function values at the quadrature points
1703         /// @param[in] key the key of the function node (box)
1704         /// @return function values for function node (box)
1705         template <typename Q>
coeffs2values(const keyT & key,const Tensor<Q> & coeff)1706         Tensor<Q> coeffs2values(const keyT& key, const Tensor<Q>& coeff) const {
1707             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1708             double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1709             return transform(coeff,cdata.quad_phit).scale(scale);
1710         }
1711 
1712         template <typename Q>
values2coeffs(const keyT & key,const GenTensor<Q> & values)1713         GenTensor<Q> values2coeffs(const keyT& key, const GenTensor<Q>& values) const {
1714             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1715             double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1716             return transform(values,cdata.quad_phiw).scale(scale);
1717         }
1718 
1719         template <typename Q>
values2coeffs(const keyT & key,const Tensor<Q> & values)1720         Tensor<Q> values2coeffs(const keyT& key, const Tensor<Q>& values) const {
1721             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1722             double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1723             return transform(values,cdata.quad_phiw).scale(scale);
1724         }
1725 
1726         /// Compute the function values for multiplication
1727 
1728         /// Given coefficients from a parent cell, compute the value of
1729         /// the functions at the quadrature points of a child
1730         /// @param[in] child the key for the child function node (box)
1731         /// @param[in] parent the key for the parent function node (box)
1732         /// @param[in] coeff the coefficients of scaling function basis of the parent box
1733         template <typename Q>
fcube_for_mul(const keyT & child,const keyT & parent,const Tensor<Q> & coeff)1734         Tensor<Q> fcube_for_mul(const keyT& child, const keyT& parent, const Tensor<Q>& coeff) const {
1735             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1736             if (child.level() == parent.level()) {
1737                 return coeffs2values(parent, coeff);
1738             }
1739             else if (child.level() < parent.level()) {
1740                 MADNESS_EXCEPTION("FunctionImpl: fcube_for_mul: child-parent relationship bad?",0);
1741             }
1742             else {
1743                 Tensor<double> phi[NDIM];
1744                 for (std::size_t d=0; d<NDIM; ++d) {
1745                     phi[d] = Tensor<double>(cdata.k,cdata.npt);
1746                     phi_for_mul(parent.level(),parent.translation()[d],
1747                                 child.level(), child.translation()[d], phi[d]);
1748                 }
1749                 return general_transform(coeff,phi).scale(1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume()));;
1750             }
1751         }
1752 
1753 
1754         /// Compute the function values for multiplication
1755 
1756         /// Given coefficients from a parent cell, compute the value of
1757         /// the functions at the quadrature points of a child
1758         /// @param[in] child the key for the child function node (box)
1759         /// @param[in] parent the key for the parent function node (box)
1760         /// @param[in] coeff the coefficients of scaling function basis of the parent box
1761         template <typename Q>
fcube_for_mul(const keyT & child,const keyT & parent,const GenTensor<Q> & coeff)1762         GenTensor<Q> fcube_for_mul(const keyT& child, const keyT& parent, const GenTensor<Q>& coeff) const {
1763             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1764             if (child.level() == parent.level()) {
1765                 return coeffs2values(parent, coeff);
1766             }
1767             else if (child.level() < parent.level()) {
1768                 MADNESS_EXCEPTION("FunctionImpl: fcube_for_mul: child-parent relationship bad?",0);
1769             }
1770             else {
1771                 Tensor<double> phi[NDIM];
1772                 for (size_t d=0; d<NDIM; d++) {
1773                     phi[d] = Tensor<double>(cdata.k,cdata.npt);
1774                     phi_for_mul(parent.level(),parent.translation()[d],
1775                                 child.level(), child.translation()[d], phi[d]);
1776                 }
1777                 return general_transform(coeff,phi).scale(1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume()));
1778             }
1779         }
1780 
1781 
1782         /// Functor for the mul method
1783         template <typename L, typename R>
do_mul(const keyT & key,const Tensor<L> & left,const std::pair<keyT,Tensor<R>> & arg)1784         void do_mul(const keyT& key, const Tensor<L>& left, const std::pair< keyT, Tensor<R> >& arg) {
1785             // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1786             const keyT& rkey = arg.first;
1787             const Tensor<R>& rcoeff = arg.second;
1788             //madness::print("do_mul: r", rkey, rcoeff.size());
1789             Tensor<R> rcube = fcube_for_mul(key, rkey, rcoeff);
1790             //madness::print("do_mul: l", key, left.size());
1791             Tensor<L> lcube = fcube_for_mul(key, key, left);
1792 
1793             Tensor<T> tcube(cdata.vk,false);
1794             TERNARY_OPTIMIZED_ITERATOR(T, tcube, L, lcube, R, rcube, *_p0 = *_p1 * *_p2;);
1795             double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1796             tcube = transform(tcube,cdata.quad_phiw).scale(scale);
1797             coeffs.replace(key, nodeT(coeffT(tcube,targs),false));
1798         }
1799 
1800 
1801         /// multiply the values of two coefficient tensors using a custom number of grid points
1802 
1803         /// note both coefficient tensors have to refer to the same key!
1804         /// @param[in]	c1	a tensor holding coefficients
1805         /// @param[in]	c2	another tensor holding coeffs
1806         /// @param[in]	npt	number of grid points (optional, default is cdata.npt)
1807         /// @return		coefficient tensor holding the product of the values of c1 and c2
1808         template<typename R>
mul(const Tensor<T> & c1,const Tensor<R> & c2,const int npt,const keyT & key)1809         Tensor<TENSOR_RESULT_TYPE(T,R)> mul(const Tensor<T>& c1, const Tensor<R>& c2,
1810                                             const int npt, const keyT& key) const {
1811             typedef TENSOR_RESULT_TYPE(T,R) resultT;
1812 
1813             const FunctionCommonData<T,NDIM>& cdata2=FunctionCommonData<T,NDIM>::get(npt);
1814 
1815             // construct a tensor with the npt coeffs
1816             Tensor<T> c11(cdata2.vk), c22(cdata2.vk);
1817             c11(this->cdata.s0)=c1;
1818             c22(this->cdata.s0)=c2;
1819 
1820             // it's sufficient to scale once
1821             double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1822             Tensor<T> c1value=transform(c11,cdata2.quad_phit).scale(scale);
1823             Tensor<R> c2value=transform(c22,cdata2.quad_phit);
1824             Tensor<resultT> resultvalue(cdata2.vk,false);
1825             TERNARY_OPTIMIZED_ITERATOR(resultT, resultvalue, T, c1value, R, c2value, *_p0 = *_p1 * *_p2;);
1826 
1827             Tensor<resultT> result=transform(resultvalue,cdata2.quad_phiw);
1828 
1829             // return a copy of the slice to have the tensor contiguous
1830             return copy(result(this->cdata.s0));
1831         }
1832 
1833 
1834         /// Functor for the binary_op method
1835         template <typename L, typename R, typename opT>
do_binary_op(const keyT & key,const Tensor<L> & left,const std::pair<keyT,Tensor<R>> & arg,const opT & op)1836 	  void do_binary_op(const keyT& key, const Tensor<L>& left,
1837 			    const std::pair< keyT, Tensor<R> >& arg,
1838 			    const opT& op) {
1839             //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
1840 	  const keyT& rkey = arg.first;
1841 	  const Tensor<R>& rcoeff = arg.second;
1842 	  Tensor<R> rcube = fcube_for_mul(key, rkey, rcoeff);
1843 	  Tensor<L> lcube = fcube_for_mul(key, key, left);
1844 
1845 	  Tensor<T> tcube(cdata.vk,false);
1846 	  op(key, tcube, lcube, rcube);
1847 	  double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1848 	  tcube = transform(tcube,cdata.quad_phiw).scale(scale);
1849 	  coeffs.replace(key, nodeT(coeffT(tcube,targs),false));
1850 	}
1851 
1852         /// Invoked by result to perform result += alpha*left+beta*right in wavelet basis
1853 
1854         /// Does not assume that any of result, left, right have the same distribution.
1855         /// For most purposes result will start as an empty so actually are implementing
1856         /// out of place gaxpy.  If all functions have the same distribution there is
1857         /// no communication except for the optional fence.
1858         template <typename L, typename R>
gaxpy(T alpha,const FunctionImpl<L,NDIM> & left,T beta,const FunctionImpl<R,NDIM> & right,bool fence)1859         void gaxpy(T alpha, const FunctionImpl<L,NDIM>& left,
1860                    T beta, const FunctionImpl<R,NDIM>& right, bool fence) {
1861             // Loop over local nodes in both functions.  Add in left and subtract right.
1862             // Not that efficient in terms of memory bandwidth but ensures we do
1863             // not miss any nodes.
1864             typename FunctionImpl<L,NDIM>::dcT::const_iterator left_end = left.coeffs.end();
1865             for (typename FunctionImpl<L,NDIM>::dcT::const_iterator it=left.coeffs.begin();
1866                  it!=left_end;
1867                  ++it) {
1868                 const keyT& key = it->first;
1869                 const typename FunctionImpl<L,NDIM>::nodeT& other_node = it->second;
1870                 coeffs.send(key, &nodeT:: template gaxpy_inplace<T,L>, 1.0, other_node, alpha);
1871             }
1872             typename FunctionImpl<R,NDIM>::dcT::const_iterator right_end = right.coeffs.end();
1873             for (typename FunctionImpl<R,NDIM>::dcT::const_iterator it=right.coeffs.begin();
1874                  it!=right_end;
1875                  ++it) {
1876                 const keyT& key = it->first;
1877                 const typename FunctionImpl<L,NDIM>::nodeT& other_node = it->second;
1878                 coeffs.send(key, &nodeT:: template gaxpy_inplace<T,R>, 1.0, other_node, beta);
1879             }
1880             if (fence)
1881                 world.gop.fence();
1882         }
1883 
1884         /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
1885         /// @param[in] op the unary operator for the coefficients
1886         template <typename opT>
unary_op_coeff_inplace(const opT & op,bool fence)1887         void unary_op_coeff_inplace(const opT& op, bool fence) {
1888             typename dcT::iterator end = coeffs.end();
1889             for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) {
1890                 const keyT& parent = it->first;
1891                 nodeT& node = it->second;
1892                 if (node.has_coeff()) {
1893                     //                    op(parent, node.coeff());
1894                     TensorArgs full(-1.0,TT_FULL);
1895                     change_tensor_type(node.coeff(),full);
1896                     op(parent, node.coeff().full_tensor());
1897                     change_tensor_type(node.coeff(),targs);
1898                     //                	op(parent,node);
1899                 }
1900             }
1901             if (fence)
1902                 world.gop.fence();
1903         }
1904 
1905         /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
1906         /// @param[in] op the unary operator for the coefficients
1907         template <typename opT>
unary_op_node_inplace(const opT & op,bool fence)1908         void unary_op_node_inplace(const opT& op, bool fence) {
1909             typename dcT::iterator end = coeffs.end();
1910             for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) {
1911                 const keyT& parent = it->first;
1912                 nodeT& node = it->second;
1913                 op(parent, node);
1914             }
1915             if (fence)
1916                 world.gop.fence();
1917         }
1918 
1919         /// Integrate over one particle of a two particle function and get a one particle function
1920         /// bsp \int g(1,2) \delta(2-1) d2 = f(1)
1921         /// The overall dimension of g should be even
1922 
1923 		/// The operator
1924         template<std::size_t LDIM>
dirac_convolution_op(const keyT & key,const nodeT & node,FunctionImpl<T,LDIM> * f)1925 		void dirac_convolution_op(const keyT &key, const nodeT &node, FunctionImpl<T,LDIM>* f) const {
1926 			// fast return if the node has children (not a leaf node)
1927 			if(node.has_children()) return;
1928 
1929 			const implT* g=this;
1930 
1931 			// break the 6D key into two 3D keys (may also work for every even dimension)
1932 			Key<LDIM> key1, key2;
1933 			key.break_apart(key1,key2);
1934 
1935 			// get the coefficients of the 6D function g
1936 			const coeffT& g_coeff = node.coeff();
1937 
1938 			// get the values of the 6D function g
1939 			coeffT g_values = g->coeffs2values(key,g_coeff);
1940 
1941 			// Determine rank and k
1942 			const long rank=g_values.rank();
1943 			const long maxk=f->get_k();
1944 			MADNESS_ASSERT(maxk==g_coeff.dim(0));
1945 
1946 			// get tensors for particle 1 and 2 (U and V in SVD)
1947 			tensorT vec1=copy(g_values.config().ref_vector(0).reshape(rank,maxk,maxk,maxk));
1948 			tensorT vec2=g_values.config().ref_vector(1).reshape(rank,maxk,maxk,maxk);
1949 			tensorT result(maxk,maxk,maxk);  // should give zero tensor
1950 			// Multiply the values of each U and V vector
1951 			for (long i=0; i<rank; ++i) {
1952 				tensorT c1=vec1(Slice(i,i),_,_,_); // shallow copy (!)
1953 				tensorT c2=vec2(Slice(i,i),_,_,_);
1954 				c1.emul(c2); // this changes vec1 because of shallow copy, but not the g function because of the deep copy made above
1955 				double singular_value_i = g_values.config().weights(i);
1956 				result += (singular_value_i*c1);
1957 			}
1958 
1959 			// accumulate coefficients (since only diagonal boxes are used the coefficients get just replaced, but accumulate is needed to create the right tree structure
1960 			tensorT f_coeff = f->values2coeffs(key1,result);
1961 			f->coeffs.task(key1, &FunctionNode<T,LDIM>::accumulate2, f_coeff, f->coeffs, key1, TaskAttributes::hipri());
1962 //             coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri());
1963 
1964 
1965 			return;
1966 		}
1967 
1968 
1969         template<std::size_t LDIM>
do_dirac_convolution(FunctionImpl<T,LDIM> * f,bool fence)1970         void do_dirac_convolution(FunctionImpl<T,LDIM>* f, bool fence) const {
1971             typename dcT::const_iterator end = this->coeffs.end();
1972             for (typename dcT::const_iterator it=this->coeffs.begin(); it!=end; ++it) {
1973                 // looping through all the leaf(!) coefficients in the NDIM function ("this")
1974                 const keyT& key = it->first;
1975                 const FunctionNode<T,NDIM>& node = it->second;
1976                 if (node.is_leaf()) {
1977                 	// only process the diagonal boxes
1978         			Key<LDIM> key1, key2;
1979         			key.break_apart(key1,key2);
1980         			if(key1 == key2){
1981                         ProcessID p = coeffs.owner(key);
1982                         woT::task(p, &implT:: template dirac_convolution_op<LDIM>, key, node, f);
1983         			}
1984         		}
1985             }
1986             world.gop.fence(); // fence is necessary if trickle down is used afterwards
1987             // trickle down and undo redundand shouldnt change anything if only the diagonal elements are considered above -> check this
1988             f->trickle_down(true); // fence must be true otherwise undo_redundant will have trouble
1989             f->undo_redundant(true);
1990             f->verify_tree();
1991             //if (fence) world.gop.fence(); // unnecessary, fence is activated in undo_redundant
1992 
1993         }
1994 
1995 
1996         /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
1997         /// @param[in] op the unary operator for the coefficients
1998         template <typename opT>
flo_unary_op_node_inplace(const opT & op,bool fence)1999         void flo_unary_op_node_inplace(const opT& op, bool fence) {
2000             typedef Range<typename dcT::iterator> rangeT;
2001 //            typedef do_unary_op_value_inplace<opT> xopT;
2002             world.taskq.for_each<rangeT,opT>(rangeT(coeffs.begin(), coeffs.end()), op);
2003             if (fence) world.gop.fence();
2004         }
2005 
2006         /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence
2007         /// @param[in] op the unary operator for the coefficients
2008         template <typename opT>
flo_unary_op_node_inplace(const opT & op,bool fence)2009         void flo_unary_op_node_inplace(const opT& op, bool fence) const {
2010             typedef Range<typename dcT::const_iterator> rangeT;
2011 //            typedef do_unary_op_value_inplace<opT> xopT;
2012             world.taskq.for_each<rangeT,opT>(rangeT(coeffs.begin(), coeffs.end()), op);
2013             if (fence)
2014                 world.gop.fence();
2015         }
2016 
2017         /// truncate tree at a certain level
2018         /// @param[in] max_level truncate tree below this level
2019         void erase(const Level& max_level);
2020 
2021         /// Returns some asymmetry measure ... no comms
2022         double check_symmetry_local() const;
2023 
2024         /// given an NS tree resulting from a convolution, truncate leafs if appropriate
2025         struct do_truncate_NS_leafs {
2026             typedef Range<typename dcT::iterator> rangeT;
2027             const implT* f;     // for calling its member functions
2028 
do_truncate_NS_leafsdo_truncate_NS_leafs2029             do_truncate_NS_leafs(const implT* f) : f(f) {}
2030 
operatordo_truncate_NS_leafs2031             bool operator()(typename rangeT::iterator& it) const {
2032 
2033                 const keyT& key = it->first;
2034                 nodeT& node = it->second;
2035 
2036                 if (node.is_leaf() and node.coeff().has_data()) {
2037                     coeffT d = copy(node.coeff());
2038                     d(f->cdata.s0)=0.0;
2039                     const double error=d.normf();
2040                     const double tol=f->truncate_tol(f->get_thresh(),key);
2041                     if (error<tol) node.coeff()=copy(node.coeff()(f->cdata.s0));
2042                 }
2043                 return true;
2044             }
serializedo_truncate_NS_leafs2045             template <typename Archive> void serialize(const Archive& ar) {}
2046 
2047         };
2048 
2049         /// remove all coefficients of internal nodes
2050         /// presumably to switch from redundant to reconstructed state
2051         struct remove_internal_coeffs {
2052             typedef Range<typename dcT::iterator> rangeT;
2053 
2054             /// constructor need impl for cdata
remove_internal_coeffsremove_internal_coeffs2055             remove_internal_coeffs() {}
2056 
operatorremove_internal_coeffs2057             bool operator()(typename rangeT::iterator& it) const {
2058 
2059                 nodeT& node = it->second;
2060                 if (node.has_children()) node.clear_coeff();
2061                 return true;
2062             }
serializeremove_internal_coeffs2063             template <typename Archive> void serialize(const Archive& ar) {}
2064 
2065         };
2066 
2067 
2068         /// keep only the sum coefficients in each node
2069         struct do_keep_sum_coeffs {
2070             typedef Range<typename dcT::iterator> rangeT;
2071             implT* impl;
2072 
2073             /// constructor need impl for cdata
do_keep_sum_coeffsdo_keep_sum_coeffs2074             do_keep_sum_coeffs(implT* impl) :impl(impl) {}
2075 
operatordo_keep_sum_coeffs2076             bool operator()(typename rangeT::iterator& it) const {
2077 
2078                 nodeT& node = it->second;
2079                 coeffT s=copy(node.coeff()(impl->cdata.s0));
2080                 node.coeff()=s;
2081                 return true;
2082             }
serializedo_keep_sum_coeffs2083             template <typename Archive> void serialize(const Archive& ar) {}
2084 
2085         };
2086 
2087 
2088         /// reduce the rank of the nodes, optional fence
2089         struct do_reduce_rank {
2090             typedef Range<typename dcT::iterator> rangeT;
2091 
2092             // threshold for rank reduction / SVD truncation
2093             TensorArgs args;
2094 
2095             // constructor takes target precision
do_reduce_rankdo_reduce_rank2096             do_reduce_rank() {}
do_reduce_rankdo_reduce_rank2097             do_reduce_rank(const TensorArgs& targs) : args(targs) {}
do_reduce_rankdo_reduce_rank2098             do_reduce_rank(const double& thresh) {
2099                 args.thresh=thresh;
2100             }
2101 
2102             //
operatordo_reduce_rank2103             bool operator()(typename rangeT::iterator& it) const {
2104 
2105                 nodeT& node = it->second;
2106                 node.reduceRank(args.thresh);
2107                 return true;
2108             }
serializedo_reduce_rank2109             template <typename Archive> void serialize(const Archive& ar) {}
2110         };
2111 
2112 
2113 
2114         /// check symmetry wrt particle exchange
2115         struct do_check_symmetry_local {
2116             typedef Range<typename dcT::const_iterator> rangeT;
2117             const implT* f;
do_check_symmetry_localdo_check_symmetry_local2118             do_check_symmetry_local() {}
do_check_symmetry_localdo_check_symmetry_local2119             do_check_symmetry_local(const implT& f) : f(&f) {}
2120 
2121             /// return the norm of the difference of this node and its "mirror" node
operatordo_check_symmetry_local2122             double operator()(typename rangeT::iterator& it) const {
2123 
2124                 const keyT& key = it->first;
2125                 const nodeT& fnode = it->second;
2126 
2127                 // skip internal nodes
2128                 if (fnode.has_children()) return 0.0;
2129 
2130                 if (f->world.size()>1) return 0.0;
2131 
2132                 // exchange particles
2133                 std::vector<long> map(NDIM);
2134                 map[0]=3; map[1]=4; map[2]=5;
2135                 map[3]=0; map[4]=1; map[5]=2;
2136 
2137                 // make mapped key
2138                 Vector<Translation,NDIM> l;
2139                 for (std::size_t i=0; i<NDIM; ++i) l[map[i]] = key.translation()[i];
2140                 const keyT mapkey(key.level(),l);
2141 
2142                 double norm=0.0;
2143 
2144 
2145                 // hope it's local
2146                 if (f->get_coeffs().probe(mapkey)) {
2147                     MADNESS_ASSERT(f->get_coeffs().probe(mapkey));
2148                     const nodeT& mapnode=f->get_coeffs().find(mapkey).get()->second;
2149 
2150                     bool have_c1=fnode.coeff().has_data() and fnode.coeff().config().has_data();
2151                     bool have_c2=mapnode.coeff().has_data() and mapnode.coeff().config().has_data();
2152 
2153                     if (have_c1 and have_c2) {
2154                         tensorT c1=fnode.coeff().full_tensor_copy();
2155                         tensorT c2=mapnode.coeff().full_tensor_copy();
2156                         c2 = copy(c2.mapdim(map));
2157                         norm=(c1-c2).normf();
2158                     } else if (have_c1) {
2159                         tensorT c1=fnode.coeff().full_tensor_copy();
2160                         norm=c1.normf();
2161                     } else if (have_c2) {
2162                         tensorT c2=mapnode.coeff().full_tensor_copy();
2163                         norm=c2.normf();
2164                     } else {
2165                         norm=0.0;
2166                     }
2167                 } else {
2168                     norm=fnode.coeff().normf();
2169                 }
2170                 return norm*norm;
2171             }
2172 
operatordo_check_symmetry_local2173             double operator()(double a, double b) const {
2174                 return (a+b);
2175             }
2176 
serializedo_check_symmetry_local2177             template <typename Archive> void serialize(const Archive& ar) {
2178                 MADNESS_EXCEPTION("no serialization of do_check_symmetry yet",1);
2179             }
2180 
2181 
2182         };
2183 
2184         /// merge the coefficent boxes of this into other's tree
2185 
2186         /// no comm, and the tree should be in an consistent state by virtue
2187         /// of FunctionNode::gaxpy_inplace
2188         template<typename Q, typename R>
2189         struct do_merge_trees {
2190             typedef Range<typename dcT::const_iterator> rangeT;
2191             FunctionImpl<Q,NDIM>* other;
2192             T alpha;
2193             R beta;
do_merge_treesdo_merge_trees2194             do_merge_trees() {}
do_merge_treesdo_merge_trees2195             do_merge_trees(const T alpha, const R beta, FunctionImpl<Q,NDIM>& other)
2196                 : other(&other), alpha(alpha), beta(beta) {}
2197 
2198             /// return the norm of the difference of this node and its "mirror" node
operatordo_merge_trees2199             bool operator()(typename rangeT::iterator& it) const {
2200 
2201                 const keyT& key = it->first;
2202                 const nodeT& fnode = it->second;
2203 
2204                 // if other's node exists: add this' coeffs to it
2205                 // otherwise insert this' node into other's tree
2206                 typename dcT::accessor acc;
2207                 if (other->get_coeffs().find(acc,key)) {
2208                     nodeT& gnode=acc->second;
2209                     gnode.gaxpy_inplace(beta,fnode,alpha);
2210                 } else {
2211                     nodeT gnode=fnode;
2212                     gnode.scale(alpha);
2213                     other->get_coeffs().replace(key,gnode);
2214                 }
2215                 return true;
2216             }
2217 
serializedo_merge_trees2218             template <typename Archive> void serialize(const Archive& ar) {
2219                 MADNESS_EXCEPTION("no serialization of do_merge_trees",1);
2220             }
2221         };
2222 
2223 
2224         /// map this on f
2225         struct do_mapdim {
2226             typedef Range<typename dcT::iterator> rangeT;
2227 
2228             std::vector<long> map;
2229             implT* f;
2230 
do_mapdimdo_mapdim2231             do_mapdim() : f(0) {};
do_mapdimdo_mapdim2232             do_mapdim(const std::vector<long> map, implT& f) : map(map), f(&f) {}
2233 
operatordo_mapdim2234             bool operator()(typename rangeT::iterator& it) const {
2235 
2236                 const keyT& key = it->first;
2237                 const nodeT& node = it->second;
2238 
2239                 Vector<Translation,NDIM> l;
2240                 for (std::size_t i=0; i<NDIM; ++i) l[map[i]] = key.translation()[i];
2241                 tensorT c = node.coeff().full_tensor_copy();
2242                 if (c.size()) c = copy(c.mapdim(map));
2243                 coeffT cc(c,f->get_tensor_args());
2244                 f->get_coeffs().replace(keyT(key.level(),l), nodeT(cc,node.has_children()));
2245 
2246                 return true;
2247             }
serializedo_mapdim2248             template <typename Archive> void serialize(const Archive& ar) {
2249                 MADNESS_EXCEPTION("no serialization of do_mapdim",1);
2250             }
2251 
2252         };
2253 
2254         /// "put" this on g
2255         struct do_average {
2256             typedef Range<typename dcT::const_iterator> rangeT;
2257 
2258             implT* g;
2259 
do_averagedo_average2260             do_average() {}
do_averagedo_average2261             do_average(implT& g) : g(&g) {}
2262 
2263             /// iterator it points to this
operatordo_average2264             bool operator()(typename rangeT::iterator& it) const {
2265 
2266                 const keyT& key = it->first;
2267                 const nodeT& fnode = it->second;
2268 
2269                 // fast return if rhs has no coeff here
2270                 if (fnode.has_coeff()) {
2271 
2272                     // check if there is a node already existing
2273                     typename dcT::accessor acc;
2274                     if (g->get_coeffs().find(acc,key)) {
2275                         nodeT& gnode=acc->second;
2276                         if (gnode.has_coeff()) gnode.coeff()+=fnode.coeff();
2277                     } else {
2278                         g->get_coeffs().replace(key,fnode);
2279                     }
2280                 }
2281 
2282                 return true;
2283             }
serializedo_average2284             template <typename Archive> void serialize(const Archive& ar) {}
2285         };
2286 
2287         /// change representation of nodes' coeffs to low rank, optional fence
2288         struct do_change_tensor_type {
2289             typedef Range<typename dcT::iterator> rangeT;
2290 
2291             // threshold for rank reduction / SVD truncation
2292             TensorArgs targs;
2293 
2294             // constructor takes target precision
do_change_tensor_typedo_change_tensor_type2295             do_change_tensor_type() {}
do_change_tensor_typedo_change_tensor_type2296             do_change_tensor_type(const TensorArgs& targs) : targs(targs) {}
2297 
2298             //
operatordo_change_tensor_type2299             bool operator()(typename rangeT::iterator& it) const {
2300 
2301                 nodeT& node = it->second;
2302                 change_tensor_type(node.coeff(),targs);
2303                 return true;
2304 
2305             }
serializedo_change_tensor_type2306             template <typename Archive> void serialize(const Archive& ar) {}
2307         };
2308 
2309         struct do_consolidate_buffer {
2310             typedef Range<typename dcT::iterator> rangeT;
2311 
2312             // threshold for rank reduction / SVD truncation
2313             TensorArgs targs;
2314 
2315             // constructor takes target precision
do_consolidate_bufferdo_consolidate_buffer2316             do_consolidate_buffer() {}
do_consolidate_bufferdo_consolidate_buffer2317             do_consolidate_buffer(const TensorArgs& targs) : targs(targs) {}
operatordo_consolidate_buffer2318             bool operator()(typename rangeT::iterator& it) const {
2319                 it->second.consolidate_buffer(targs);
2320                 return true;
2321             }
serializedo_consolidate_buffer2322             template <typename Archive> void serialize(const Archive& ar) {}
2323         };
2324 
2325 
2326 
2327         template <typename opT>
2328         struct do_unary_op_value_inplace {
2329             typedef Range<typename dcT::iterator> rangeT;
2330             implT* impl;
2331             opT op;
do_unary_op_value_inplacedo_unary_op_value_inplace2332             do_unary_op_value_inplace(implT* impl, const opT& op) : impl(impl), op(op) {}
operatordo_unary_op_value_inplace2333             bool operator()(typename rangeT::iterator& it) const {
2334                 const keyT& key = it->first;
2335                 nodeT& node = it->second;
2336                 if (node.has_coeff()) {
2337                     const TensorArgs full_args(-1.0,TT_FULL);
2338                     change_tensor_type(node.coeff(),full_args);
2339                     tensorT& t= node.coeff().full_tensor();
2340                     //double before = t.normf();
2341                     tensorT values = impl->fcube_for_mul(key, key, t);
2342                     op(key, values);
2343                     double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2344                     t = transform(values,impl->cdata.quad_phiw).scale(scale);
2345                     node.coeff()=coeffT(t,impl->get_tensor_args());
2346                     //double after = t.normf();
2347                     //madness::print("XOP:", key, before, after);
2348                 }
2349                 return true;
2350             }
serializedo_unary_op_value_inplace2351             template <typename Archive> void serialize(const Archive& ar) {}
2352         };
2353 
2354         template <typename Q, typename R>
2355         /// @todo I don't know what this does other than a trasform
vtransform_doit(const std::shared_ptr<FunctionImpl<R,NDIM>> & right,const Tensor<Q> & c,const std::vector<std::shared_ptr<FunctionImpl<T,NDIM>>> & vleft,double tol)2356         void vtransform_doit(const std::shared_ptr< FunctionImpl<R,NDIM> >& right,
2357                              const Tensor<Q>& c,
2358                              const std::vector< std::shared_ptr< FunctionImpl<T,NDIM> > >& vleft,
2359                              double tol) {
2360             // To reduce crunch on vectors being transformed each task
2361             // does them in a random order
2362             std::vector<unsigned int> ind(vleft.size());
2363             for (unsigned int i=0; i<vleft.size(); ++i) {
2364                 ind[i] = i;
2365             }
2366             for (unsigned int i=0; i<vleft.size(); ++i) {
2367                 unsigned int j = RandomValue<int>()%vleft.size();
2368                 std::swap(ind[i],ind[j]);
2369             }
2370 
2371             typename FunctionImpl<R,NDIM>::dcT::const_iterator end = right->coeffs.end();
2372             for (typename FunctionImpl<R,NDIM>::dcT::const_iterator it=right->coeffs.begin(); it != end; ++it) {
2373                 if (it->second.has_coeff()) {
2374                     const Key<NDIM>& key = it->first;
2375                     const GenTensor<R>& r = it->second.coeff();
2376                     double norm = r.normf();
2377                     double keytol = truncate_tol(tol,key);
2378 
2379                     for (unsigned int j=0; j<vleft.size(); ++j) {
2380                         unsigned int i = ind[j]; // Random permutation
2381                         if (std::abs(norm*c(i)) > keytol) {
2382                             implT* left = vleft[i].get();
2383                             typename dcT::accessor acc;
2384                             bool newnode = left->coeffs.insert(acc,key);
2385                             if (newnode && key.level()>0) {
2386                                 Key<NDIM> parent = key.parent();
2387 				if (left->coeffs.is_local(parent))
2388 				  left->coeffs.send(parent, &nodeT::set_has_children_recursive, left->coeffs, parent);
2389 				else
2390 				  left->coeffs.task(parent, &nodeT::set_has_children_recursive, left->coeffs, parent);
2391 
2392                             }
2393                             nodeT& node = acc->second;
2394                             if (!node.has_coeff())
2395                                 node.set_coeff(coeffT(cdata.v2k,targs));
2396                             coeffT& t = node.coeff();
2397                             t.gaxpy(1.0, r, c(i));
2398                         }
2399                     }
2400                 }
2401             }
2402         }
2403 
2404         /// Refine multiple functions down to the same finest level
2405 
2406         /// @param v the vector of functions we are refining.
2407         /// @param key the current node.
2408         /// @param c the vector of coefficients passed from above.
2409         void refine_to_common_level(const std::vector<FunctionImpl<T,NDIM>*>& v,
2410                                     const std::vector<tensorT>& c,
2411                                     const keyT key);
2412 
2413         /// Inplace operate on many functions (impl's) with an operator within a certain box
2414         /// @param[in] key the key of the current function node (box)
2415         /// @param[in] op the operator
2416         /// @param[in] v the vector of function impl's on which to be operated
2417         template <typename opT>
multiop_values_doit(const keyT & key,const opT & op,const std::vector<implT * > & v)2418         void multiop_values_doit(const keyT& key, const opT& op, const std::vector<implT*>& v) {
2419             std::vector<tensorT> c(v.size());
2420             for (unsigned int i=0; i<v.size(); i++) {
2421                 if (v[i]) {
2422                     coeffT cc = coeffs2values(key, v[i]->coeffs.find(key).get()->second.coeff());
2423                     c[i]=cc.full_tensor();
2424                 }
2425             }
2426             tensorT r = op(key, c);
2427             coeffs.replace(key, nodeT(coeffT(values2coeffs(key, r),targs),false));
2428         }
2429 
2430         /// Inplace operate on many functions (impl's) with an operator within a certain box
2431         /// Assumes all functions have been refined down to the same level
2432         /// @param[in] op the operator
2433         /// @param[in] v the vector of function impl's on which to be operated
2434         template <typename opT>
multiop_values(const opT & op,const std::vector<implT * > & v)2435         void multiop_values(const opT& op, const std::vector<implT*>& v) {
2436             // rough check on refinement level (ignore non-initialized functions
2437             for (std::size_t i=1; i<v.size(); ++i) {
2438                 if (v[i] and v[i-1]) {
2439                     MADNESS_ASSERT(v[i]->coeffs.size()==v[i-1]->coeffs.size());
2440                 }
2441             }
2442             typename dcT::iterator end = v[0]->coeffs.end();
2443             for (typename dcT::iterator it=v[0]->coeffs.begin(); it!=end; ++it) {
2444                 const keyT& key = it->first;
2445                 if (it->second.has_coeff())
2446                     world.taskq.add(*this, &implT:: template multiop_values_doit<opT>, key, op, v);
2447                 else
2448                     coeffs.replace(key, nodeT(coeffT(),true));
2449             }
2450             world.gop.fence();
2451         }
2452 
2453         /// Inplace operate on many functions (impl's) with an operator within a certain box
2454 
2455         /// @param[in] key the key of the current function node (box)
2456         /// @param[in] op the operator
2457         /// @param[in] vin the vector of function impl's on which to be operated
2458         /// @param[out] vout the resulting vector of function impl's
2459         template <typename opT>
multi_to_multi_op_values_doit(const keyT & key,const opT & op,const std::vector<implT * > & vin,std::vector<implT * > & vout)2460         void multi_to_multi_op_values_doit(const keyT& key, const opT& op,
2461                 const std::vector<implT*>& vin, std::vector<implT*>& vout) {
2462             std::vector<tensorT> c(vin.size());
2463             for (unsigned int i=0; i<vin.size(); i++) {
2464                 if (vin[i]) {
2465                     coeffT cc = coeffs2values(key, vin[i]->coeffs.find(key).get()->second.coeff());
2466                     c[i]=cc.full_tensor();
2467                 }
2468             }
2469             std::vector<tensorT> r = op(key, c);
2470             MADNESS_ASSERT(r.size()==vout.size());
2471             for (std::size_t i=0; i<vout.size(); ++i) {
2472                 vout[i]->coeffs.replace(key, nodeT(coeffT(values2coeffs(key, r[i]),targs),false));
2473             }
2474         }
2475 
2476         /// Inplace operate on many functions (impl's) with an operator within a certain box
2477 
2478         /// Assumes all functions have been refined down to the same level
2479         /// @param[in] op the operator
2480         /// @param[in] vin the vector of function impl's on which to be operated
2481         /// @param[out] vout the resulting vector of function impl's
2482         template <typename opT>
2483         void multi_to_multi_op_values(const opT& op, const std::vector<implT*>& vin,
2484                 std::vector<implT*>& vout, const bool fence=true) {
2485             // rough check on refinement level (ignore non-initialized functions
2486             for (std::size_t i=1; i<vin.size(); ++i) {
2487                 if (vin[i] and vin[i-1]) {
2488                     MADNESS_ASSERT(vin[i]->coeffs.size()==vin[i-1]->coeffs.size());
2489                 }
2490             }
2491             typename dcT::iterator end = vin[0]->coeffs.end();
2492             for (typename dcT::iterator it=vin[0]->coeffs.begin(); it!=end; ++it) {
2493                 const keyT& key = it->first;
2494                 if (it->second.has_coeff())
2495                     world.taskq.add(*this, &implT:: template multi_to_multi_op_values_doit<opT>,
2496                             key, op, vin, vout);
2497                 else {
2498                     // fill result functions with empty box in this key
2499                     for (implT* it2 : vout) {
2500                         it2->coeffs.replace(key, nodeT(coeffT(),true));
2501                     }
2502                 }
2503             }
2504             if (fence) world.gop.fence();
2505         }
2506 
2507         /// Transforms a vector of functions left[i] = sum[j] right[j]*c[j,i] using sparsity
2508         /// @param[in] vright vector of functions (impl's) on which to be transformed
2509         /// @param[in] c the tensor (matrix) transformer
2510         /// @param[in] vleft vector of of the *newly* transformed functions (impl's)
2511         template <typename Q, typename R>
vtransform(const std::vector<std::shared_ptr<FunctionImpl<R,NDIM>>> & vright,const Tensor<Q> & c,const std::vector<std::shared_ptr<FunctionImpl<T,NDIM>>> & vleft,double tol,bool fence)2512         void vtransform(const std::vector< std::shared_ptr< FunctionImpl<R,NDIM> > >& vright,
2513                         const Tensor<Q>& c,
2514                         const std::vector< std::shared_ptr< FunctionImpl<T,NDIM> > >& vleft,
2515                         double tol,
2516                         bool fence) {
2517             for (unsigned int j=0; j<vright.size(); ++j) {
2518                 world.taskq.add(*this, &implT:: template vtransform_doit<Q,R>, vright[j], copy(c(j,_)), vleft, tol);
2519             }
2520             if (fence)
2521                 world.gop.fence();
2522         }
2523 
2524         /// Unary operation applied inplace to the values with optional refinement and fence
2525         /// @param[in] op the unary operator for the values
2526         template <typename opT>
unary_op_value_inplace(const opT & op,bool fence)2527         void unary_op_value_inplace(const opT& op, bool fence) {
2528             typedef Range<typename dcT::iterator> rangeT;
2529             typedef do_unary_op_value_inplace<opT> xopT;
2530             world.taskq.for_each<rangeT,xopT>(rangeT(coeffs.begin(), coeffs.end()), xopT(this,op));
2531             if (fence)
2532                 world.gop.fence();
2533         }
2534 
2535         // Multiplication assuming same distribution and recursive descent
2536         /// Both left and right functions are in the scaling function basis
2537         /// @param[in] key the key to the current function node (box)
2538         /// @param[in] left the function impl associated with the left function
2539         /// @param[in] lcin the scaling function coefficients associated with the
2540         ///            current box in the left function
2541         /// @param[in] vrightin the vector of function impl's associated with
2542         ///            the vector of right functions
2543         /// @param[in] vrcin the vector scaling function coefficients associated with the
2544         ///            current box in the right functions
2545         /// @param[out] vresultin the vector of resulting functions (impl's)
2546         template <typename L, typename R>
mulXXveca(const keyT & key,const FunctionImpl<L,NDIM> * left,const Tensor<L> & lcin,const std::vector<const FunctionImpl<R,NDIM> * > vrightin,const std::vector<Tensor<R>> & vrcin,const std::vector<FunctionImpl<T,NDIM> * > vresultin,double tol)2547         void mulXXveca(const keyT& key,
2548                        const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin,
2549                        const std::vector<const FunctionImpl<R,NDIM>*> vrightin,
2550                        const std::vector< Tensor<R> >& vrcin,
2551                        const std::vector<FunctionImpl<T,NDIM>*> vresultin,
2552                        double tol) {
2553             typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
2554             typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT;
2555 
2556             double lnorm = 1e99;
2557             Tensor<L> lc = lcin;
2558             if (lc.size() == 0) {
2559                 literT it = left->coeffs.find(key).get();
2560                 MADNESS_ASSERT(it != left->coeffs.end());
2561                 lnorm = it->second.get_norm_tree();
2562                 if (it->second.has_coeff())
2563                     lc = it->second.coeff().full_tensor_copy();
2564             }
2565 
2566             // Loop thru RHS functions seeing if anything can be multiplied
2567             std::vector<FunctionImpl<T,NDIM>*> vresult;
2568             std::vector<const FunctionImpl<R,NDIM>*> vright;
2569             std::vector< Tensor<R> > vrc;
2570             vresult.reserve(vrightin.size());
2571             vright.reserve(vrightin.size());
2572             vrc.reserve(vrightin.size());
2573 
2574             for (unsigned int i=0; i<vrightin.size(); ++i) {
2575                 FunctionImpl<T,NDIM>* result = vresultin[i];
2576                 const FunctionImpl<R,NDIM>* right = vrightin[i];
2577                 Tensor<R> rc = vrcin[i];
2578                 double rnorm;
2579                 if (rc.size() == 0) {
2580                     riterT it = right->coeffs.find(key).get();
2581                     MADNESS_ASSERT(it != right->coeffs.end());
2582                     rnorm = it->second.get_norm_tree();
2583                     if (it->second.has_coeff())
2584                         rc = it->second.coeff().full_tensor_copy();
2585                 }
2586                 else {
2587                     rnorm = rc.normf();
2588                 }
2589 
2590                 if (rc.size() && lc.size()) { // Yipee!
2591                     result->task(world.rank(), &implT:: template do_mul<L,R>, key, lc, std::make_pair(key,rc));
2592                 }
2593                 else if (tol && lnorm*rnorm < truncate_tol(tol, key)) {
2594                     result->coeffs.replace(key, nodeT(coeffT(cdata.vk,targs),false)); // Zero leaf
2595                 }
2596                 else {  // Interior node
2597                     result->coeffs.replace(key, nodeT(coeffT(),true));
2598                     vresult.push_back(result);
2599                     vright.push_back(right);
2600                     vrc.push_back(rc);
2601                 }
2602             }
2603 
2604             if (vresult.size()) {
2605                 Tensor<L> lss;
2606                 if (lc.size()) {
2607                     Tensor<L> ld(cdata.v2k);
2608                     ld(cdata.s0) = lc(___);
2609                     lss = left->unfilter(ld);
2610                 }
2611 
2612                 std::vector< Tensor<R> > vrss(vresult.size());
2613                 for (unsigned int i=0; i<vresult.size(); ++i) {
2614                     if (vrc[i].size()) {
2615                         Tensor<R> rd(cdata.v2k);
2616                         rd(cdata.s0) = vrc[i](___);
2617                         vrss[i] = vright[i]->unfilter(rd);
2618                     }
2619                 }
2620 
2621                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2622                     const keyT& child = kit.key();
2623                     Tensor<L> ll;
2624 
2625                     std::vector<Slice> cp = child_patch(child);
2626 
2627                     if (lc.size())
2628                         ll = copy(lss(cp));
2629 
2630                     std::vector< Tensor<R> > vv(vresult.size());
2631                     for (unsigned int i=0; i<vresult.size(); ++i) {
2632                         if (vrc[i].size())
2633                             vv[i] = copy(vrss[i](cp));
2634                     }
2635 
2636                     woT::task(coeffs.owner(child), &implT:: template mulXXveca<L,R>, child, left, ll, vright, vv, vresult, tol);
2637                 }
2638             }
2639         }
2640 
2641         /// Multiplication using recursive descent and assuming same distribution
2642         /// Both left and right functions are in the scaling function basis
2643         /// @param[in] key the key to the current function node (box)
2644         /// @param[in] left the function impl associated with the left function
2645         /// @param[in] lcin the scaling function coefficients associated with the
2646         ///            current box in the left function
2647         /// @param[in] right the function impl associated with the right function
2648         /// @param[in] rcin the scaling function coefficients associated with the
2649         ///            current box in the right function
2650         template <typename L, typename R>
mulXXa(const keyT & key,const FunctionImpl<L,NDIM> * left,const Tensor<L> & lcin,const FunctionImpl<R,NDIM> * right,const Tensor<R> & rcin,double tol)2651         void mulXXa(const keyT& key,
2652                     const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin,
2653                     const FunctionImpl<R,NDIM>* right,const Tensor<R>& rcin,
2654                     double tol) {
2655             typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
2656             typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT;
2657 
2658             double lnorm=1e99, rnorm=1e99;
2659 
2660             Tensor<L> lc = lcin;
2661             if (lc.size() == 0) {
2662                 literT it = left->coeffs.find(key).get();
2663                 MADNESS_ASSERT(it != left->coeffs.end());
2664                 lnorm = it->second.get_norm_tree();
2665                 if (it->second.has_coeff())
2666                     lc = it->second.coeff().full_tensor_copy();
2667             }
2668 
2669             Tensor<R> rc = rcin;
2670             if (rc.size() == 0) {
2671                 riterT it = right->coeffs.find(key).get();
2672                 MADNESS_ASSERT(it != right->coeffs.end());
2673                 rnorm = it->second.get_norm_tree();
2674                 if (it->second.has_coeff())
2675                     rc = it->second.coeff().full_tensor_copy();
2676             }
2677 
2678             // both nodes are leaf nodes: multiply and return
2679             if (rc.size() && lc.size()) { // Yipee!
2680                 do_mul<L,R>(key, lc, std::make_pair(key,rc));
2681                 return;
2682             }
2683 
2684             if (tol) {
2685                 if (lc.size())
2686                     lnorm = lc.normf(); // Otherwise got from norm tree above
2687                 if (rc.size())
2688                     rnorm = rc.normf();
2689                 if (lnorm*rnorm < truncate_tol(tol, key)) {
2690                     coeffs.replace(key, nodeT(coeffT(cdata.vk,targs),false)); // Zero leaf node
2691                     return;
2692                 }
2693             }
2694 
2695             // Recur down
2696             coeffs.replace(key, nodeT(coeffT(),true)); // Interior node
2697 
2698             Tensor<L> lss;
2699             if (lc.size()) {
2700                 Tensor<L> ld(cdata.v2k);
2701                 ld(cdata.s0) = lc(___);
2702                 lss = left->unfilter(ld);
2703             }
2704 
2705             Tensor<R> rss;
2706             if (rc.size()) {
2707                 Tensor<R> rd(cdata.v2k);
2708                 rd(cdata.s0) = rc(___);
2709                 rss = right->unfilter(rd);
2710             }
2711 
2712             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2713                 const keyT& child = kit.key();
2714                 Tensor<L> ll;
2715                 Tensor<R> rr;
2716                 if (lc.size())
2717                     ll = copy(lss(child_patch(child)));
2718                 if (rc.size())
2719                     rr = copy(rss(child_patch(child)));
2720 
2721                 woT::task(coeffs.owner(child), &implT:: template mulXXa<L,R>, child, left, ll, right, rr, tol);
2722             }
2723         }
2724 
2725 
2726         // Binary operation on values using recursive descent and assuming same distribution
2727         /// Both left and right functions are in the scaling function basis
2728         /// @param[in] key the key to the current function node (box)
2729         /// @param[in] left the function impl associated with the left function
2730         /// @param[in] lcin the scaling function coefficients associated with the
2731         ///            current box in the left function
2732         /// @param[in] right the function impl associated with the right function
2733         /// @param[in] rcin the scaling function coefficients associated with the
2734         ///            current box in the right function
2735         /// @param[in] op the binary operator
2736         template <typename L, typename R, typename opT>
binaryXXa(const keyT & key,const FunctionImpl<L,NDIM> * left,const Tensor<L> & lcin,const FunctionImpl<R,NDIM> * right,const Tensor<R> & rcin,const opT & op)2737         void binaryXXa(const keyT& key,
2738                        const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin,
2739                        const FunctionImpl<R,NDIM>* right,const Tensor<R>& rcin,
2740                        const opT& op) {
2741             typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
2742             typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT;
2743 
2744             Tensor<L> lc = lcin;
2745             if (lc.size() == 0) {
2746                 literT it = left->coeffs.find(key).get();
2747                 MADNESS_ASSERT(it != left->coeffs.end());
2748                 if (it->second.has_coeff())
2749                     lc = it->second.coeff().full_tensor_copy();
2750             }
2751 
2752             Tensor<R> rc = rcin;
2753             if (rc.size() == 0) {
2754                 riterT it = right->coeffs.find(key).get();
2755                 MADNESS_ASSERT(it != right->coeffs.end());
2756                 if (it->second.has_coeff())
2757                     rc = it->second.coeff().full_tensor_copy();
2758             }
2759 
2760             if (rc.size() && lc.size()) { // Yipee!
2761                 do_binary_op<L,R>(key, lc, std::make_pair(key,rc), op);
2762                 return;
2763             }
2764 
2765             // Recur down
2766             coeffs.replace(key, nodeT(coeffT(),true)); // Interior node
2767 
2768             Tensor<L> lss;
2769             if (lc.size()) {
2770                 Tensor<L> ld(cdata.v2k);
2771                 ld(cdata.s0) = lc(___);
2772                 lss = left->unfilter(ld);
2773             }
2774 
2775             Tensor<R> rss;
2776             if (rc.size()) {
2777                 Tensor<R> rd(cdata.v2k);
2778                 rd(cdata.s0) = rc(___);
2779                 rss = right->unfilter(rd);
2780             }
2781 
2782             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2783                 const keyT& child = kit.key();
2784                 Tensor<L> ll;
2785                 Tensor<R> rr;
2786                 if (lc.size())
2787                     ll = copy(lss(child_patch(child)));
2788                 if (rc.size())
2789                     rr = copy(rss(child_patch(child)));
2790 
2791                 woT::task(coeffs.owner(child), &implT:: template binaryXXa<L,R,opT>, child, left, ll, right, rr, op);
2792             }
2793         }
2794 
2795         template <typename Q, typename opT>
2796         struct coeff_value_adaptor {
2797             typedef typename opT::resultT resultT;
2798             const FunctionImpl<Q,NDIM>* impl_func;
2799             opT op;
2800 
coeff_value_adaptorcoeff_value_adaptor2801             coeff_value_adaptor() {};
coeff_value_adaptorcoeff_value_adaptor2802             coeff_value_adaptor(const FunctionImpl<Q,NDIM>* impl_func,
2803                                 const opT& op)
2804                 : impl_func(impl_func), op(op) {}
2805 
operatorcoeff_value_adaptor2806             Tensor<resultT> operator()(const Key<NDIM>& key, const Tensor<Q>& t) const {
2807                 Tensor<Q> invalues = impl_func->coeffs2values(key, t);
2808 
2809                 Tensor<resultT> outvalues = op(key, invalues);
2810 
2811                 return impl_func->values2coeffs(key, outvalues);
2812             }
2813 
2814             template <typename Archive>
serializecoeff_value_adaptor2815             void serialize(Archive& ar) {
2816                 ar & impl_func & op;
2817             }
2818         };
2819 
2820         /// Out of place unary operation on function impl
2821         /// The skeleton algorithm should resemble something like
2822         ///
2823         /// *this = op(*func)
2824         ///
2825         /// @param[in] key the key of the current function node (box)
2826         /// @param[in] func the function impl on which to be operated
2827         /// @param[in] op the unary operator
2828         template <typename Q, typename opT>
unaryXXa(const keyT & key,const FunctionImpl<Q,NDIM> * func,const opT & op)2829         void unaryXXa(const keyT& key,
2830                       const FunctionImpl<Q,NDIM>* func, const opT& op) {
2831 
2832             //            const Tensor<Q>& fc = func->coeffs.find(key).get()->second.full_tensor_copy();
2833             const Tensor<Q> fc = func->coeffs.find(key).get()->second.coeff().full_tensor_copy();
2834 
2835             if (fc.size() == 0) {
2836                 // Recur down
2837                 coeffs.replace(key, nodeT(coeffT(),true)); // Interior node
2838                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2839                     const keyT& child = kit.key();
2840                     woT::task(coeffs.owner(child), &implT:: template unaryXXa<Q,opT>, child, func, op);
2841                 }
2842             }
2843             else {
2844                 tensorT t=op(key,fc);
2845                 coeffs.replace(key, nodeT(coeffT(t,targs),false)); // Leaf node
2846             }
2847         }
2848 
2849         /// Multiplies two functions (impl's) together. Delegates to the mulXXa() method
2850         /// @param[in] left pointer to the left function impl
2851         /// @param[in] right pointer to the right function impl
2852         /// @param[in] tol numerical tolerance
2853         template <typename L, typename R>
mulXX(const FunctionImpl<L,NDIM> * left,const FunctionImpl<R,NDIM> * right,double tol,bool fence)2854         void mulXX(const FunctionImpl<L,NDIM>* left, const FunctionImpl<R,NDIM>* right, double tol, bool fence) {
2855             if (world.rank() == coeffs.owner(cdata.key0))
2856                 mulXXa(cdata.key0, left, Tensor<L>(), right, Tensor<R>(), tol);
2857             if (fence)
2858                 world.gop.fence();
2859 
2860             //verify_tree();
2861         }
2862 
2863         /// Performs binary operation on two functions (impl's). Delegates to the binaryXXa() method
2864         /// @param[in] left pointer to the left function impl
2865         /// @param[in] right pointer to the right function impl
2866         /// @param[in] op the binary operator
2867         template <typename L, typename R, typename opT>
binaryXX(const FunctionImpl<L,NDIM> * left,const FunctionImpl<R,NDIM> * right,const opT & op,bool fence)2868         void binaryXX(const FunctionImpl<L,NDIM>* left, const FunctionImpl<R,NDIM>* right,
2869                       const opT& op, bool fence) {
2870             if (world.rank() == coeffs.owner(cdata.key0))
2871                 binaryXXa(cdata.key0, left, Tensor<L>(), right, Tensor<R>(), op);
2872             if (fence)
2873                 world.gop.fence();
2874 
2875             //verify_tree();
2876         }
2877 
2878         /// Performs unary operation on function impl. Delegates to the unaryXXa() method
2879         /// @param[in] func function impl of the operand
2880         /// @param[in] op the unary operator
2881         template <typename Q, typename opT>
unaryXX(const FunctionImpl<Q,NDIM> * func,const opT & op,bool fence)2882         void unaryXX(const FunctionImpl<Q,NDIM>* func, const opT& op, bool fence) {
2883             if (world.rank() == coeffs.owner(cdata.key0))
2884                 unaryXXa(cdata.key0, func, op);
2885             if (fence)
2886                 world.gop.fence();
2887 
2888             //verify_tree();
2889         }
2890 
2891         /// Performs unary operation on function impl. Delegates to the unaryXXa() method
2892         /// @param[in] func function impl of the operand
2893         /// @param[in] op the unary operator
2894         template <typename Q, typename opT>
unaryXXvalues(const FunctionImpl<Q,NDIM> * func,const opT & op,bool fence)2895         void unaryXXvalues(const FunctionImpl<Q,NDIM>* func, const opT& op, bool fence) {
2896             if (world.rank() == coeffs.owner(cdata.key0))
2897                 unaryXXa(cdata.key0, func, coeff_value_adaptor<Q,opT>(func,op));
2898             if (fence)
2899                 world.gop.fence();
2900 
2901             //verify_tree();
2902         }
2903 
2904         /// Multiplies a function (impl) with a vector of functions (impl's). Delegates to the
2905         /// mulXXveca() method.
2906         /// @param[in] left pointer to the left function impl
2907         /// @param[in] vright vector of pointers to the right function impl's
2908         /// @param[in] tol numerical tolerance
2909         /// @param[out] vresult vector of pointers to the resulting function impl's
2910         template <typename L, typename R>
mulXXvec(const FunctionImpl<L,NDIM> * left,const std::vector<const FunctionImpl<R,NDIM> * > & vright,const std::vector<FunctionImpl<T,NDIM> * > & vresult,double tol,bool fence)2911         void mulXXvec(const FunctionImpl<L,NDIM>* left,
2912                       const std::vector<const FunctionImpl<R,NDIM>*>& vright,
2913                       const std::vector<FunctionImpl<T,NDIM>*>& vresult,
2914                       double tol,
2915                       bool fence) {
2916             std::vector< Tensor<R> > vr(vright.size());
2917             if (world.rank() == coeffs.owner(cdata.key0))
2918                 mulXXveca(cdata.key0, left, Tensor<L>(), vright, vr, vresult, tol);
2919             if (fence)
2920                 world.gop.fence();
2921         }
2922 
2923         Future<double> get_norm_tree_recursive(const keyT& key) const;
2924 
2925         mutable long box_leaf[1000];
2926         mutable long box_interior[1000];
2927 
2928         // horrifically non-scalable
2929         void put_in_box(ProcessID from, long nl, long ni) const;
2930 
2931         /// Prints summary of data distribution
2932         void print_info() const;
2933 
2934         /// Verify tree is properly constructed ... global synchronization involved
2935 
2936         /// If an inconsistency is detected, prints a message describing the error and
2937         /// then throws a madness exception.
2938         ///
2939         /// This is a reasonably quick and scalable operation that is
2940         /// useful for debugging and paranoia.
2941         void verify_tree() const;
2942 
2943         /// Walk up the tree returning pair(key,node) for first node with coefficients
2944 
2945         /// Three possibilities.
2946         ///
2947         /// 1) The coeffs are present and returned with the key of the containing node.
2948         ///
2949         /// 2) The coeffs are further up the tree ... the request is forwarded up.
2950         ///
2951         /// 3) The coeffs are futher down the tree ... an empty tensor is returned.
2952         ///
2953         /// !! This routine is crying out for an optimization to
2954         /// manage the number of messages being sent ... presently
2955         /// each parent is fetched 2^(n*d) times where n is the no. of
2956         /// levels between the level of evaluation and the parent.
2957         /// Alternatively, reimplement multiply as a downward tree
2958         /// walk and just pass the parent down.  Slightly less
2959         /// parallelism but much less communication.
2960         /// @todo Robert .... help!
2961         void sock_it_to_me(const keyT& key,
2962                            const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const;
2963         /// As above, except
2964         /// 3) The coeffs are constructed from the avg of nodes further down the tree
2965         /// @todo Robert .... help!
2966         void sock_it_to_me_too(const keyT& key,
2967                                const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const;
2968 
2969         /// @todo help!
2970         void plot_cube_kernel(archive::archive_ptr< Tensor<T> > ptr,
2971                               const keyT& key,
2972                               const coordT& plotlo, const coordT& plothi, const std::vector<long>& npt,
2973                               bool eval_refine) const;
2974 
2975 
2976         /// Evaluate a cube/slice of points ... plotlo and plothi are already in simulation coordinates
2977         /// No communications
2978         /// @param[in] plotlo the coordinate of the starting point
2979         /// @param[in] plothi the coordinate of the ending point
2980         /// @param[in] npt the number of points in each dimension
2981         Tensor<T> eval_plot_cube(const coordT& plotlo,
2982                                  const coordT& plothi,
2983                                  const std::vector<long>& npt,
2984                                  const bool eval_refine = false) const;
2985 
2986 
2987         /// Evaluate function only if point is local returning (true,value); otherwise return (false,0.0)
2988 
2989         /// maxlevel is the maximum depth to search down to --- the max local depth can be
2990         /// computed with max_local_depth();
2991         std::pair<bool,T> eval_local_only(const Vector<double,NDIM>& xin, Level maxlevel) ;
2992 
2993 
2994         /// Evaluate the function at a point in \em simulation coordinates
2995 
2996         /// Only the invoking process will get the result via the
2997         /// remote reference to a future.  Active messages may be sent
2998         /// to other nodes.
2999         void eval(const Vector<double,NDIM>& xin,
3000                   const keyT& keyin,
3001                   const typename Future<T>::remote_refT& ref);
3002 
3003         /// Get the depth of the tree at a point in \em simulation coordinates
3004 
3005         /// Only the invoking process will get the result via the
3006         /// remote reference to a future.  Active messages may be sent
3007         /// to other nodes.
3008         ///
3009         /// This function is a minimally-modified version of eval()
3010         void evaldepthpt(const Vector<double,NDIM>& xin,
3011                          const keyT& keyin,
3012                          const typename Future<Level>::remote_refT& ref);
3013 
3014         /// Get the rank of leaf box of the tree at a point in \em simulation coordinates
3015 
3016         /// Only the invoking process will get the result via the
3017         /// remote reference to a future.  Active messages may be sent
3018         /// to other nodes.
3019         ///
3020         /// This function is a minimally-modified version of eval()
3021         void evalR(const Vector<double,NDIM>& xin,
3022                    const keyT& keyin,
3023                    const typename Future<long>::remote_refT& ref);
3024 
3025 
3026         /// Computes norm of low/high-order polyn. coeffs for autorefinement test
3027 
3028         /// t is a k^d tensor.  In order to screen the autorefinement
3029         /// during multiplication compute the norms of
3030         /// ... lo ... the block of t for all polynomials of order < k/2
3031         /// ... hi ... the block of t for all polynomials of order >= k/2
3032         ///
3033         /// k=5   0,1,2,3,4     --> 0,1,2 ... 3,4
3034         /// k=6   0,1,2,3,4,5   --> 0,1,2 ... 3,4,5
3035         ///
3036         /// k=number of wavelets, so k=5 means max order is 4, so max exactly
3037         /// representable squarable polynomial is of order 2.
3038         void tnorm(const tensorT& t, double* lo, double* hi) const;
3039 
3040         // This invoked if node has not been autorefined
3041         void do_square_inplace(const keyT& key);
3042 
3043         // This invoked if node has been autorefined
3044         void do_square_inplace2(const keyT& parent, const keyT& child, const tensorT& parent_coeff);
3045 
3046         /// Always returns false (for when autorefine is not wanted)
3047         bool noautorefine(const keyT& key, const tensorT& t) const;
3048 
3049         /// Returns true if this block of coeffs needs autorefining
3050         bool autorefine_square_test(const keyT& key, const nodeT& t) const;
3051 
3052         /// Pointwise squaring of function with optional global fence
3053 
3054         /// If not autorefining, local computation only if not fencing.
3055         /// If autorefining, may result in asynchronous communication.
3056         void square_inplace(bool fence);
3057         void abs_inplace(bool fence);
3058         void abs_square_inplace(bool fence);
3059 
3060         /// is this the same as trickle_down() ?
3061         void sum_down_spawn(const keyT& key, const coeffT& s);
3062 
3063         /// After 1d push operator must sum coeffs down the tree to restore correct scaling function coefficients
3064         void sum_down(bool fence);
3065 
3066         /// perform this multiplication: h(1,2) = f(1,2) * g(1)
3067         template<size_t LDIM>
3068         struct multiply_op {
3069 
randomizemultiply_op3070             static bool randomize() {return false;}
3071             typedef CoeffTracker<T,NDIM> ctT;
3072             typedef CoeffTracker<T,LDIM> ctL;
3073             typedef multiply_op<LDIM> this_type;
3074 
3075             implT* h;     ///< the result function h(1,2) = f(1,2) * g(1)
3076             ctT f;
3077             ctL g;
3078             int particle;       ///< if g is g(1) or g(2)
3079 
multiply_opmultiply_op3080             multiply_op() : particle(1) {}
3081 
multiply_opmultiply_op3082             multiply_op(implT* h, const ctT& f, const ctL& g, const int particle)
3083                 : h(h), f(f), g(g), particle(particle) {};
3084 
3085             /// return true if this will be a leaf node
3086 
3087             /// use generalization of tnorm for a GenTensor
screenmultiply_op3088             bool screen(const coeffT& fcoeff, const coeffT& gcoeff, const keyT& key) const {
3089                 double glo=0.0, ghi=0.0, flo=0.0, fhi=0.0;
3090                 MADNESS_ASSERT(gcoeff.tensor_type()==TT_FULL);
3091                 g.get_impl()->tnorm(gcoeff.full_tensor(), &glo, &ghi);
3092 
3093                 // this assumes intimate knowledge of how a GenTensor is organized!
3094                 MADNESS_ASSERT(fcoeff.tensor_type()==TT_2D);
3095                 const long rank=fcoeff.rank();
3096                 const long maxk=fcoeff.dim(0);
3097                 tensorT vec=fcoeff.config().ref_vector(particle-1).reshape(rank,maxk,maxk,maxk);
3098                 for (long i=0; i<rank; ++i) {
3099                     double lo,hi;
3100                     tensorT c=vec(Slice(i,i),_,_,_).reshape(maxk,maxk,maxk);
3101                     g.get_impl()->tnorm(c, &lo, &hi);        // note we use g instead of h, since g is 3D
3102                     flo+=lo*fcoeff.config().weights(i);
3103                     fhi+=hi*fcoeff.config().weights(i);
3104                 }
3105                 double total_hi=glo*fhi + ghi*flo + fhi*ghi;
3106                 return (total_hi<h->truncate_tol(h->get_thresh(),key));
3107 
3108             }
3109 
3110             /// apply this on a FunctionNode of f and g of Key key
3111 
3112             /// @param[in]  key key for FunctionNode in f and g, (g: broken into particles)
3113             /// @return <this node is a leaf, coefficients of this node>
operatormultiply_op3114             std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
3115 
3116                 //                bool is_leaf=(not fdatum.second.has_children());
3117                 //                if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT());
3118 
3119                 // break key into particles (these are the child keys, with f/gdatum come the parent keys)
3120                 Key<LDIM> key1,key2;
3121                 key.break_apart(key1,key2);
3122                 const Key<LDIM> gkey= (particle==1) ? key1 : key2;
3123 
3124                 // get coefficients of the actual FunctionNode
3125                 coeffT coeff1=f.get_impl()->parent_to_child(f.coeff(),f.key(),key);
3126                 coeff1.normalize();
3127                 const coeffT coeff2=g.get_impl()->parent_to_child(g.coeff(),g.key(),gkey);
3128 
3129                 // multiplication is done in TT_2D
3130                 coeffT coeff1_2D=coeff1.convert(TensorArgs(h->get_thresh(),TT_2D));
3131                 coeff1_2D.normalize();
3132 
3133                 bool is_leaf=screen(coeff1_2D,coeff2,key);
3134                 if (key.level()<2) is_leaf=false;
3135 
3136                 coeffT hcoeff;
3137                 if (is_leaf) {
3138 
3139                     // convert coefficients to values
3140                     coeffT hvalues=f.get_impl()->coeffs2values(key,coeff1_2D);
3141                     coeffT gvalues=g.get_impl()->coeffs2values(gkey,coeff2);
3142 
3143                     // perform multiplication
3144                     coeffT result_val=h->multiply(hvalues,gvalues,particle-1);
3145 
3146                     hcoeff=h->values2coeffs(key,result_val);
3147 
3148                     // conversion on coeffs, not on values, because it implies truncation!
3149                     if (hcoeff.tensor_type()!=h->get_tensor_type())
3150                         hcoeff=hcoeff.convert(h->get_tensor_args());
3151                 }
3152 
3153                 return std::pair<bool,coeffT> (is_leaf,hcoeff);
3154             }
3155 
make_childmultiply_op3156             this_type make_child(const keyT& child) const {
3157 
3158                 // break key into particles
3159                 Key<LDIM> key1, key2;
3160                 child.break_apart(key1,key2);
3161                 const Key<LDIM> gkey= (particle==1) ? key1 : key2;
3162 
3163                 return this_type(h,f.make_child(child),g.make_child(gkey),particle);
3164             }
3165 
activatemultiply_op3166             Future<this_type> activate() const {
3167             	Future<ctT> f1=f.activate();
3168             	Future<ctL> g1=g.activate();
3169                 return h->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
3170                                           &this_type::forward_ctor),h,f1,g1,particle);
3171             }
3172 
forward_ctormultiply_op3173             this_type forward_ctor(implT* h1, const ctT& f1, const ctL& g1, const int particle) {
3174             	return this_type(h1,f1,g1,particle);
3175             }
3176 
serializemultiply_op3177             template <typename Archive> void serialize(const Archive& ar) {
3178                 ar & h & f & g;
3179             }
3180         };
3181 
3182 
3183         /// add two functions f and g: result=alpha * f  +  beta * g
3184         struct add_op {
3185 
3186             typedef CoeffTracker<T,NDIM> ctT;
3187             typedef add_op this_type;
3188 
randomizeadd_op3189             bool randomize() const {return false;}
3190 
3191             /// tracking coeffs of first and second addend
3192             ctT f,g;
3193             /// prefactor for f, g
3194             double alpha, beta;
3195 
add_opadd_op3196             add_op() {};
add_opadd_op3197             add_op(const ctT& f, const ctT& g, const double alpha, const double beta)
3198                 : f(f), g(g), alpha(alpha), beta(beta){}
3199 
3200             /// if we are at the bottom of the trees, return the sum of the coeffs
operatoradd_op3201             std::pair<bool,coeffT> operator()(const keyT& key) const {
3202 
3203                 bool is_leaf=(f.is_leaf() and g.is_leaf());
3204                 if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT());
3205 
3206                 coeffT fcoeff=f.get_impl()->parent_to_child(f.coeff(),f.key(),key);
3207                 coeffT gcoeff=g.get_impl()->parent_to_child(g.coeff(),g.key(),key);
3208                 coeffT hcoeff=copy(fcoeff);
3209                 hcoeff.gaxpy(alpha,gcoeff,beta);
3210                 hcoeff.reduce_rank(f.get_impl()->get_tensor_args().thresh);
3211                 return std::pair<bool,coeffT> (is_leaf,hcoeff);
3212             }
3213 
make_childadd_op3214             this_type make_child(const keyT& child) const {
3215                 return this_type(f.make_child(child),g.make_child(child),alpha,beta);
3216             }
3217 
3218             /// retrieve the coefficients (parent coeffs might be remote)
activateadd_op3219             Future<this_type> activate() const {
3220             	Future<ctT> f1=f.activate();
3221             	Future<ctT> g1=g.activate();
3222                 return f.get_impl()->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
3223                                                      &this_type::forward_ctor),f1,g1,alpha,beta);
3224             }
3225 
3226             /// taskq-compatible ctor
forward_ctoradd_op3227             this_type forward_ctor(const ctT& f1, const ctT& g1, const double alpha, const double beta) {
3228             	return this_type(f1,g1,alpha,beta);
3229             }
3230 
serializeadd_op3231             template <typename Archive> void serialize(const Archive& ar) {
3232                 ar & f & g & alpha & beta;
3233             }
3234 
3235         };
3236 
3237         /// multiply f (a pair function of NDIM) with an orbital g (LDIM=NDIM/2)
3238 
3239         /// as in (with h(1,2)=*this) : h(1,2) = g(1) * f(1,2)
3240         /// use tnorm as a measure to determine if f (=*this) must be refined
3241         /// @param[in]  f           the NDIM function f=f(1,2)
3242         /// @param[in]  g           the LDIM function g(1) (or g(2))
3243         /// @param[in]  particle    1 or 2, as in g(1) or g(2)
3244         template<size_t LDIM>
multiply(const implT * f,const FunctionImpl<T,LDIM> * g,const int particle)3245         void multiply(const implT* f, const FunctionImpl<T,LDIM>* g, const int particle) {
3246 
3247             keyT key0=f->cdata.key0;
3248 
3249             if (world.rank() == coeffs.owner(key0)) {
3250 
3251                 CoeffTracker<T,NDIM> ff(f);
3252                 CoeffTracker<T,LDIM> gg(g);
3253 
3254                 typedef multiply_op<LDIM> coeff_opT;
3255                 coeff_opT coeff_op(this,ff,gg,particle);
3256 
3257                 typedef insert_op<T,NDIM> apply_opT;
3258                 apply_opT apply_op(this);
3259 
3260                 ProcessID p= coeffs.owner(key0);
3261                 woT::task(p, &implT:: template forward_traverse<coeff_opT,apply_opT>, coeff_op, apply_op, key0);
3262             }
3263 
3264             this->compressed=false;
3265         }
3266 
3267         /// Hartree product of two LDIM functions to yield a NDIM = 2*LDIM function
3268         template<size_t LDIM, typename leaf_opT>
3269         struct hartree_op {
randomizehartree_op3270             bool randomize() const {return false;}
3271 
3272             typedef hartree_op<LDIM,leaf_opT> this_type;
3273             typedef CoeffTracker<T,LDIM> ctL;
3274 
3275             implT* result; 	    ///< where to construct the pair function
3276             ctL p1, p2;			///< tracking coeffs of the two lo-dim functions
3277             leaf_opT leaf_op;   ///< determine if a given node will be a leaf node
3278 
3279             // ctor
hartree_ophartree_op3280             hartree_op() {}
hartree_ophartree_op3281             hartree_op(implT* result, const ctL& p11, const ctL& p22, const leaf_opT& leaf_op)
3282                 : result(result), p1(p11), p2(p22), leaf_op(leaf_op) {
3283                 MADNESS_ASSERT(LDIM+LDIM==NDIM);
3284             }
3285 
operatorhartree_op3286             std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
3287 
3288                 // break key into particles (these are the child keys, with datum1/2 come the parent keys)
3289                 Key<LDIM> key1,key2;
3290                 key.break_apart(key1,key2);
3291 
3292                 // this returns the appropriate NS coeffs for key1 and key2 resp.
3293             	const coeffT fcoeff=p1.coeff(key1);
3294                 const coeffT gcoeff=p2.coeff(key2);
3295                 bool is_leaf=leaf_op(key,fcoeff.full_tensor(),gcoeff.full_tensor());
3296                 if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT());
3297 
3298                 // extract the sum coeffs from the NS coeffs
3299                 const coeffT s1=fcoeff(p1.get_impl()->cdata.s0);
3300                 const coeffT s2=gcoeff(p2.get_impl()->cdata.s0);
3301 
3302                 // new coeffs are simply the hartree/kronecker/outer product --
3303                 coeffT coeff=outer(s1,s2,result->get_tensor_args());
3304                 // no post-determination
3305                 //                is_leaf=leaf_op(key,coeff);
3306                 return std::pair<bool,coeffT>(is_leaf,coeff);
3307             }
3308 
make_childhartree_op3309             this_type make_child(const keyT& child) const {
3310 
3311                 // break key into particles
3312                 Key<LDIM> key1, key2;
3313                 child.break_apart(key1,key2);
3314 
3315                 return this_type(result,p1.make_child(key1),p2.make_child(key2),leaf_op);
3316             }
3317 
activatehartree_op3318             Future<this_type> activate() const {
3319             	Future<ctL> p11=p1.activate();
3320             	Future<ctL> p22=p2.activate();
3321                 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
3322                                                &this_type::forward_ctor),result,p11,p22,leaf_op);
3323             }
3324 
forward_ctorhartree_op3325             this_type forward_ctor(implT* result1, const ctL& p11, const ctL& p22, const leaf_opT& leaf_op) {
3326             	return this_type(result1,p11,p22,leaf_op);
3327             }
3328 
serializehartree_op3329             template <typename Archive> void serialize(const Archive& ar) {
3330                 ar & result & p1 & p2 & leaf_op;
3331             }
3332         };
3333 
3334         /// traverse a non-existing tree
3335 
3336         /// part II: activate coeff_op, i.e. retrieve all the necessary remote boxes (communication)
3337         /// @param[in]	coeff_op	operator making the coefficients that needs activation
3338         /// @param[in]	apply_op	just passing thru
3339         /// @param[in]	key			the key we are working on
3340         template<typename coeff_opT, typename apply_opT>
forward_traverse(const coeff_opT & coeff_op,const apply_opT & apply_op,const keyT & key)3341         void forward_traverse(const coeff_opT& coeff_op, const apply_opT& apply_op, const keyT& key) const {
3342             MADNESS_ASSERT(coeffs.is_local(key));
3343             Future<coeff_opT> active_coeff=coeff_op.activate();
3344             woT::task(world.rank(), &implT:: template traverse_tree<coeff_opT,apply_opT>, active_coeff, apply_op, key);
3345         }
3346 
3347 
3348         /// traverse a non-existing tree
3349 
3350         /// part I: make the coefficients, process them and continue the recursion if necessary
3351         /// @param[in]	coeff_op	operator making the coefficients and determining them being leaves
3352         /// @param[in]	apply_op	operator processing the coefficients
3353         /// @param[in]	key			the key we are currently working on
3354         template<typename coeff_opT, typename apply_opT>
traverse_tree(const coeff_opT & coeff_op,const apply_opT & apply_op,const keyT & key)3355         void traverse_tree(const coeff_opT& coeff_op, const apply_opT& apply_op, const keyT& key) const {
3356             MADNESS_ASSERT(coeffs.is_local(key));
3357 
3358             typedef typename std::pair<bool,coeffT> argT;
3359             const argT arg=coeff_op(key);
3360             apply_op.operator()(key,arg.second,arg.first);
3361 
3362             const bool has_children=(not arg.first);
3363             if (has_children) {
3364                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3365                     const keyT& child=kit.key();
3366                     coeff_opT child_op=coeff_op.make_child(child);
3367                     // spawn activation where child is local
3368                     ProcessID p=coeffs.owner(child);
3369 
3370                     void (implT::*ft)(const coeff_opT&, const apply_opT&, const keyT&) const = &implT::forward_traverse<coeff_opT,apply_opT>;
3371 
3372                     woT::task(p, ft, child_op, apply_op, child);
3373                 }
3374             }
3375         }
3376 
3377 
3378         /// given two functions of LDIM, perform the Hartree/Kronecker/outer product
3379 
3380         /// |Phi(1,2)> = |phi(1)> x |phi(2)>
3381         /// @param[in]	p1	FunctionImpl of particle 1
3382         /// @param[in]	p2	FunctionImpl of particle 2
3383         /// @param[in]	leaf_op	operator determining of a given box will be a leaf
3384         template<std::size_t LDIM, typename leaf_opT>
hartree_product(const FunctionImpl<T,LDIM> * p1,const FunctionImpl<T,LDIM> * p2,const leaf_opT & leaf_op,bool fence)3385         void hartree_product(const FunctionImpl<T,LDIM>* p1, const FunctionImpl<T,LDIM>* p2,
3386                              const leaf_opT& leaf_op, bool fence) {
3387             MADNESS_ASSERT(p1->is_nonstandard());
3388             MADNESS_ASSERT(p2->is_nonstandard());
3389 
3390             const keyT key0=cdata.key0;
3391 
3392             if (world.rank() == this->get_coeffs().owner(key0)) {
3393 
3394                 // prepare the CoeffTracker
3395                 CoeffTracker<T,LDIM> iap1(p1);
3396                 CoeffTracker<T,LDIM> iap2(p2);
3397 
3398                 // the operator making the coefficients
3399                 typedef hartree_op<LDIM,leaf_opT> coeff_opT;
3400                 coeff_opT coeff_op(this,iap1,iap2,leaf_op);
3401 
3402                 // this operator simply inserts the coeffs into this' tree
3403                 typedef insert_op<T,NDIM> apply_opT;
3404                 apply_opT apply_op(this);
3405 
3406                 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
3407                           coeff_op, apply_op, cdata.key0);
3408 
3409             }
3410 
3411             this->compressed=false;
3412             if (fence) world.gop.fence();
3413         }
3414 
3415 
3416         template <typename opT, typename R>
3417         void
apply_1d_realspace_push_op(const archive::archive_ptr<const opT> & pop,int axis,const keyT & key,const Tensor<R> & c)3418         apply_1d_realspace_push_op(const archive::archive_ptr<const opT>& pop, int axis, const keyT& key, const Tensor<R>& c) {
3419             const opT* op = pop.ptr;
3420             const Level n = key.level();
3421             const double cnorm = c.normf();
3422             const double tol = truncate_tol(thresh, key)*0.1; // ??? why this value????
3423 
3424             Vector<Translation,NDIM> lnew(key.translation());
3425             const Translation lold = lnew[axis];
3426             const Translation maxs = Translation(1)<<n;
3427 
3428             int nsmall = 0; // Counts neglected blocks to terminate s loop
3429             for (Translation s=0; s<maxs; ++s) {
3430                 int maxdir = s ? 1 : -1;
3431                 for (int direction=-1; direction<=maxdir; direction+=2) {
3432                     lnew[axis] = lold + direction*s;
3433                     if (lnew[axis] >= 0 && lnew[axis] < maxs) { // NON-ZERO BOUNDARY CONDITIONS IGNORED HERE !!!!!!!!!!!!!!!!!!!!
3434                         const Tensor<typename opT::opT>& r = op->rnlij(n, s*direction, true);
3435                         double Rnorm = r.normf();
3436 
3437                         if (Rnorm == 0.0) {
3438                             return; // Hard zero means finished!
3439                         }
3440 
3441                         if (s <= 1  ||  r.normf()*cnorm > tol) { // Always do kernel and neighbor
3442                             nsmall = 0;
3443                             tensorT result = transform_dir(c,r,axis);
3444 
3445                             if (result.normf() > tol*0.3) {
3446                                 Key<NDIM> dest(n,lnew);
3447                                 coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri());
3448                             }
3449                         }
3450                         else {
3451                             ++nsmall;
3452                         }
3453                     }
3454                     else {
3455                         ++nsmall;
3456                     }
3457                 }
3458                 if (nsmall >= 4) {
3459                     // If have two negligble blocks in
3460                     // succession in each direction interpret
3461                     // this as the operator being zero beyond
3462                     break;
3463                 }
3464             }
3465         }
3466 
3467         template <typename opT, typename R>
3468         void
apply_1d_realspace_push(const opT & op,const FunctionImpl<R,NDIM> * f,int axis,bool fence)3469         apply_1d_realspace_push(const opT& op, const FunctionImpl<R,NDIM>* f, int axis, bool fence) {
3470             MADNESS_ASSERT(!f->is_compressed());
3471 
3472             typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator fiterT;
3473             typedef FunctionNode<R,NDIM> fnodeT;
3474             fiterT end = f->coeffs.end();
3475             ProcessID me = world.rank();
3476             for (fiterT it=f->coeffs.begin(); it!=end; ++it) {
3477                 const fnodeT& node = it->second;
3478                 if (node.has_coeff()) {
3479                     const keyT& key = it->first;
3480                     const Tensor<R>& c = node.coeff().full_tensor_copy();
3481                     woT::task(me, &implT:: template apply_1d_realspace_push_op<opT,R>,
3482                               archive::archive_ptr<const opT>(&op), axis, key, c);
3483                 }
3484             }
3485             if (fence) world.gop.fence();
3486         }
3487 
3488         void forward_do_diff1(const DerivativeBase<T,NDIM>* D,
3489                               const implT* f,
3490                               const keyT& key,
3491                               const std::pair<keyT,coeffT>& left,
3492                               const std::pair<keyT,coeffT>& center,
3493                               const std::pair<keyT,coeffT>& right);
3494 
3495         void do_diff1(const DerivativeBase<T,NDIM>* D,
3496                       const implT* f,
3497                       const keyT& key,
3498                       const std::pair<keyT,coeffT>& left,
3499                       const std::pair<keyT,coeffT>& center,
3500                       const std::pair<keyT,coeffT>& right);
3501 
3502         // Called by result function to differentiate f
3503         void diff(const DerivativeBase<T,NDIM>* D, const implT* f, bool fence);
3504 
3505         /// Returns key of general neighbor enforcing BC
3506 
3507         /// Out of volume keys are mapped to enforce the BC as follows.
3508         ///   * Periodic BC map back into the volume and return the correct key
3509         ///   * Zero BC - returns invalid() to indicate out of volume
3510         keyT neighbor(const keyT& key, const keyT& disp, const std::vector<bool>& is_periodic) const;
3511 
3512         /// find_me. Called by diff_bdry to get coefficients of boundary function
3513         Future< std::pair<keyT,coeffT> > find_me(const keyT& key) const;
3514 
3515         /// return the a std::pair<key, node>, which MUST exist
3516         std::pair<Key<NDIM>,ShallowNode<T,NDIM> > find_datum(keyT key) const;
3517 
3518         /// multiply the ket with a one-electron potential rr(1,2)= f(1,2)*g(1)
3519 
3520         /// @param[in]	val_ket	function values of f(1,2)
3521         /// @param[in]	val_pot	function values of g(1)
3522         /// @param[in]	particle	if 0 then g(1), if 1 then g(2)
3523         /// @return		the resulting function values
3524         coeffT multiply(const coeffT& val_ket, const coeffT& val_pot, int particle) const;
3525 
3526 
3527         /// given several coefficient tensors, assemble a result tensor
3528 
3529         /// the result looks like: 	(v(1,2) + v(1) + v(2)) |ket(1,2)>
3530         /// or 						(v(1,2) + v(1) + v(2)) |p(1) p(2)>
3531         /// i.e. coefficients for the ket and coefficients for the two particles are
3532         /// mutually exclusive. All potential terms are optional, just pass in empty coeffs.
3533         /// @param[in]	key			the key of the FunctionNode to which these coeffs belong
3534         /// @param[in]	coeff_ket	coefficients of the ket
3535         /// @param[in]	vpotential1	function values of the potential for particle 1
3536         /// @param[in]	vpotential2	function values of the potential for particle 2
3537         /// @param[in]	veri		function values for the 2-particle potential
3538         coeffT assemble_coefficients(const keyT& key, const coeffT& coeff_ket,
3539                                      const coeffT& vpotential1, const coeffT& vpotential2,
3540                                      const tensorT& veri) const;
3541 
3542         /// given a ket and the 1- and 2-electron potentials, construct the function V phi
3543 
3544         /// small memory footstep version of Vphi_op: use the NS form to have information
3545         /// about parent and children to determine if a box is a leaf. This will require
3546         /// compression of the constituent functions, which will lead to more memory usage
3547         /// there, but will avoid oversampling of the result function.
3548         template<typename opT, size_t LDIM>
3549         struct Vphi_op_NS {
3550 
randomizeVphi_op_NS3551           bool randomize() const {return true;}
3552 
3553           typedef Vphi_op_NS<opT,LDIM> this_type;
3554           typedef CoeffTracker<T,NDIM> ctT;
3555           typedef CoeffTracker<T,LDIM> ctL;
3556 
3557           implT* result;  		///< where to construct Vphi, no need to track parents
3558           opT leaf_op;    	    ///< deciding if a given FunctionNode will be a leaf node
3559           ctT iaket;				///< the ket of a pair function (exclusive with p1, p2)
3560           ctL iap1, iap2;			///< the particles 1 and 2 (exclusive with ket)
3561           ctL iav1, iav2;			///< potentials for particles 1 and 2
3562           const implT* eri;		///< 2-particle potential, must be on-demand
3563 
3564           // ctor
Vphi_op_NSVphi_op_NS3565           Vphi_op_NS() {}
Vphi_op_NSVphi_op_NS3566           Vphi_op_NS(implT* result, const opT& leaf_op, const ctT& iaket,
3567 		     const ctL& iap1, const ctL& iap2, const ctL& iav1, const ctL& iav2,
3568 		     const implT* eri)
3569           : result(result), leaf_op(leaf_op), iaket(iaket), iap1(iap1), iap2(iap2)
3570           , iav1(iav1), iav2(iav2), eri(eri) {
3571 
3572             // 2-particle potential must be on-demand
3573             if (eri) MADNESS_ASSERT(eri->is_on_demand());
3574           }
3575 
3576           /// make and insert the coefficients into result's tree
operatorVphi_op_NS3577           std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
3578 
3579 
3580             if(leaf_op.do_pre_screening()){
3581         	// this means that we only construct the boxes which are leaf boxes from the other function in the leaf_op
3582         	if(leaf_op.pre_screening(key)){
3583         	    // construct sum_coefficients, insert them and leave
3584         	    const coeffT sum_coeff=make_sum_coeffs(key);
3585         	    result->get_coeffs().replace(key,nodeT(sum_coeff,false));
3586         	    return std::pair<bool,coeffT> (true,coeffT());
3587         	}else{
3588         	    result->get_coeffs().replace(key,nodeT(coeffT(),true));
3589         	    return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key);
3590         	}
3591             }else{
3592         	// this means that the function has to be completely constructed and not mirrored by another function
3593 
3594         	// if the initial level is not reached then this must not be a leaf box
3595         	size_t il = result->get_initial_level();
3596         	if(FunctionDefaults<NDIM>::get_refine()) il+=1;
3597         	if(key.level()<il){
3598         	    //std::cout << "n=" +  std::to_string(key.level()) + " below initial level " + std::to_string(result->get_initial_level()) + "\n";
3599         	    // insert empty coeffs for this box and send off jobs for the children
3600         	    result->get_coeffs().replace(key,nodeT(coeffT(),true));
3601         	    return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key);
3602         	}
3603         	// if further refinement is needed (because we are at a special box, special point)
3604         	// and the special_level is not reached then this must not be a leaf box
3605         	if(key.level()<result->get_special_level() and leaf_op.special_refinement_needed(key)){
3606         	    //std::cout << "special refinement for n=" + std::to_string(key.level()) + "\n";
3607         	    // insert empty coeffs for this box and send off jobs for the children
3608         	    result->get_coeffs().replace(key,nodeT(coeffT(),true));
3609         	    return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key);
3610         	}
3611 
3612 
3613         	coeffT sum_coeff=make_sum_coeffs(key);
3614 
3615         	if(leaf_op.post_screening(key,sum_coeff)){
3616         	    result->get_coeffs().replace(key,nodeT(sum_coeff,false));
3617         	    //std::cout << "n=" + std::to_string(key.level()) + " is leaf by post_screening\n";
3618         	    return std::pair<bool,coeffT> (true,coeffT());
3619         	}
3620 
3621         	// do the conventional error-measurement and use the computed child coeffs to determine if they will be leafs
3622         	tensorT children_sum_coeffs=make_childrens_sum_coeffs(key);
3623         	tensorT d=result->filter(children_sum_coeffs);
3624 
3625         	// since they will be better anyway (those are only the s coeffs, not the d)
3626         	sum_coeff=coeffT(copy(d(result->get_cdata().s0)),result->get_tensor_args());
3627 
3628         	// delte s coeffs from the children tensor to get the d coeffs and the error
3629         	d(result->get_cdata().s0)=0.0;
3630         	double error=d.normf();
3631 
3632         	if(error<result->truncate_tol(result->get_thresh(),key)){
3633         	    result->get_coeffs().replace(key,nodeT(sum_coeff,false));
3634         	    //std::cout << "n=" + std::to_string(key.level()) + " is leaf by conventional error measurement (" + std::to_string(error)+ ")\n";
3635         	    return std::pair<bool,coeffT> (true,coeffT());
3636         	}
3637 
3638         	// at this point the current box will not become a leaf anymore, but we can pre-screen the chidren
3639         	// if no screening is enabled then the child boxes are compared to the downsampled parent boxes, if they are represented well they will be leaves
3640         	std::vector<bool> child_is_leaf(1<<NDIM);
3641         	std::size_t i=0;
3642         	for (KeyChildIterator<NDIM> kit(key); kit; ++kit, ++i) {
3643         	    // post-determination for this child's coeffs
3644         	    coeffT child_coeff=coeffT(copy(children_sum_coeffs(result->child_patch(kit.key()))),
3645 					      result->get_tensor_args());
3646         	    child_is_leaf[i]=leaf_op.post_screening(kit.key(),child_coeff);
3647 
3648         	    // compare to parent sum coeffs (failsafe)
3649         	    child_is_leaf[i]=(child_is_leaf[i] or leaf_op.compare_to_parent(kit.key(),child_coeff,sum_coeff));
3650         	}
3651         	// insert empty sum coeffs for this box and
3652         	// send off the tasks for those children that might not be leaves;
3653         	result->get_coeffs().replace(key,nodeT(coeffT(),true));
3654         	//std::cout << "n=" + std::to_string(key.level()) + " is no leaf\n";
3655         	return continue_recursion(child_is_leaf,children_sum_coeffs,key);
3656             }
3657 
3658             MADNESS_EXCEPTION("you should not be here",1);
3659             return std::pair<bool,coeffT> (true,coeffT());
3660           }
3661 
3662 
3663           /// loop over all children and either insert their sum coeffs or continue the recursion
3664 
3665           /// @param[in]	child_is_leaf	for each child: is it a leaf?
3666           /// @param[in]	coeffs	coefficient tensor with 2^N sum coeffs (=unfiltered NS coeffs)
3667           /// @param[in]	key		the key for the NS coeffs (=parent key of the children)
3668           /// @return		to avoid recursion outside this return: std::pair<is_leaf,coeff> = true,coeffT()
continue_recursionVphi_op_NS3669           std::pair<bool,coeffT> continue_recursion(const std::vector<bool> child_is_leaf,
3670 						    const tensorT& coeffs, const keyT& key) const {
3671             std::size_t i=0;
3672             for (KeyChildIterator<NDIM> kit(key); kit; ++kit, ++i) {
3673         	keyT child=kit.key();
3674         	bool is_leaf=child_is_leaf[i];
3675 
3676         	if (is_leaf) {
3677         	    // insert the sum coeffs
3678         	    insert_op<T,NDIM> iop(result);
3679         	    iop(child,coeffT(copy(coeffs(result->child_patch(child))),result->get_tensor_args()),is_leaf);
3680         	} else {
3681         	    this_type child_op=this->make_child(child);
3682         	    noop<T,NDIM> no;
3683         	    // spawn activation where child is local
3684         	    ProcessID p=result->get_coeffs().owner(child);
3685 
3686         	    void (implT::*ft)(const Vphi_op_NS<opT,LDIM>&, const noop<T,NDIM>&, const keyT&) const = &implT:: template forward_traverse< Vphi_op_NS<opT,LDIM>, noop<T,NDIM> >;
3687         	    result->task(p, ft, child_op, no, child);
3688         	}
3689             }
3690             // return e sum coeffs; also return always is_leaf=true:
3691             // the recursion is continued within this struct, not outside in traverse_tree!
3692             return std::pair<bool,coeffT> (true,coeffT());
3693           }
3694 
3695           /// return the values of the 2-particle potential
3696 
3697           /// @param[in]	key		the key for which the values are requested
3698           /// @return		val_eri	the values in full tensor form
eri_valuesVphi_op_NS3699           tensorT eri_values(const keyT& key) const {
3700             tensorT val_eri;
3701             if (eri and eri->is_on_demand()) {
3702         	if (eri->get_functor()->provides_coeff()) {
3703         	    val_eri=eri->coeffs2values(
3704         		key,eri->get_functor()->coeff(key).full_tensor());
3705         	} else {
3706         	    val_eri=tensorT(eri->cdata.vk);
3707         	    eri->fcube(key,*(eri->get_functor()),eri->cdata.quad_x,val_eri);
3708         	}
3709             }
3710             return val_eri;
3711           }
3712 
3713           /// make the sum coeffs for key
make_sum_coeffsVphi_op_NS3714           coeffT make_sum_coeffs(const keyT& key) const {
3715             // break key into particles
3716             Key<LDIM> key1, key2;
3717             key.break_apart(key1,key2);
3718 
3719             TensorArgs targs=result->get_tensor_args();
3720             // use the ket coeffs if they are there, or make them by hartree product
3721             const coeffT coeff_ket_NS = (iaket.get_impl())
3722                     			    ? iaket.coeff(key)
3723                     				: outer(iap1.coeff(key1),iap2.coeff(key2),targs);
3724 
3725             coeffT val_potential1, val_potential2;
3726             if (iav1.get_impl()) {
3727         	coeffT tmp=iav1.coeff(key1)(iav1.get_impl()->get_cdata().s0);
3728         	val_potential1=iav1.get_impl()->coeffs2values(key1,tmp);
3729             }
3730             if (iav2.get_impl()) {
3731         	coeffT tmp=iav2.coeff(key2)(iav2.get_impl()->get_cdata().s0);
3732         	val_potential2=iav2.get_impl()->coeffs2values(key2,tmp);
3733             }
3734             coeffT tmp=coeff_ket_NS(result->get_cdata().s0);
3735 
3736             return result->assemble_coefficients(key,tmp,
3737 						 val_potential1,val_potential2,eri_values(key));
3738           }
3739 
3740           /// make the sum coeffs for all children of key
make_childrens_sum_coeffsVphi_op_NS3741           tensorT make_childrens_sum_coeffs(const keyT& key) const {
3742             // break key into particles
3743             Key<LDIM> key1, key2;
3744             key.break_apart(key1,key2);
3745             TensorArgs targs=result->get_tensor_args();
3746 
3747             // use the ket coeffs if they are there, or make them by hartree product
3748             const coeffT coeff_ket_NS = (iaket.get_impl())
3749                     			    ? iaket.coeff(key)
3750                     				: outer(iap1.coeff(key1),iap2.coeff(key2),targs);
3751 
3752             // get the sum coeffs for all children
3753             const coeffT coeff_ket_unfiltered=result->unfilter(coeff_ket_NS);
3754             const coeffT coeff_v1_unfiltered=(iav1.get_impl())
3755                     			    ? iav1.get_impl()->unfilter(iav1.coeff(key1)) : coeffT();
3756             const coeffT coeff_v2_unfiltered=(iav2.get_impl())
3757                     			    ? iav2.get_impl()->unfilter(iav2.coeff(key2)) : coeffT();
3758 
3759             // result sum coeffs of all children
3760             tensorT s_coeffs(result->cdata.v2k);
3761             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3762 
3763         	// break key into particles
3764         	Key<LDIM> child1, child2;
3765         	kit.key().break_apart(child1,child2);
3766 
3767         	// make the values of the potentials for each child
3768         	// transform the child patch of s coeffs to values
3769         	coeffT val_potential1, val_potential2;
3770         	if (iav1.get_impl()) {
3771         	    coeffT tmp=coeff_v1_unfiltered(iav1.get_impl()->child_patch(child1));
3772         	    val_potential1=iav1.get_impl()->coeffs2values(child1,tmp);
3773         	}
3774         	if (iav2.get_impl()) {
3775         	    coeffT tmp=coeff_v2_unfiltered(iav2.get_impl()->child_patch(child2));
3776         	    val_potential2=iav2.get_impl()->coeffs2values(child2,tmp);
3777         	}
3778         	const coeffT coeff_ket=coeff_ket_unfiltered(result->child_patch(kit.key()));
3779 
3780         	// the sum coeffs for this child
3781         	const tensorT val_eri=eri_values(kit.key());
3782         	const coeffT coeff_result=result->assemble_coefficients(kit.key(),coeff_ket,
3783 									val_potential1,val_potential2,val_eri);
3784 
3785         	// accumulate the sum coeffs of the children here
3786         	s_coeffs(result->child_patch(kit.key()))+=coeff_result.full_tensor_copy();
3787             }
3788             return s_coeffs;
3789 
3790           }
3791 
make_childVphi_op_NS3792           this_type make_child(const keyT& child) const {
3793 
3794             // break key into particles
3795             Key<LDIM> key1, key2;
3796             child.break_apart(key1,key2);
3797 
3798             return this_type(result,leaf_op,iaket.make_child(child),
3799 			     iap1.make_child(key1),iap2.make_child(key2),
3800 			     iav1.make_child(key1),iav2.make_child(key2),eri);
3801           }
3802 
activateVphi_op_NS3803           Future<this_type> activate() const {
3804             Future<ctT> iaket1=iaket.activate();
3805             Future<ctL> iap11=iap1.activate();
3806             Future<ctL> iap21=iap2.activate();
3807             Future<ctL> iav11=iav1.activate();
3808             Future<ctL> iav21=iav2.activate();
3809             return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
3810 							       &this_type::forward_ctor),result,leaf_op,
3811 					   iaket1,iap11,iap21,iav11,iav21,eri);
3812           }
3813 
forward_ctorVphi_op_NS3814           this_type forward_ctor(implT* result1, const opT& leaf_op, const ctT& iaket1,
3815 				 const ctL& iap11, const ctL& iap21, const ctL& iav11, const ctL& iav21,
3816 				 const implT* eri1) {
3817             return this_type(result1,leaf_op,iaket1,iap11,iap21,iav11,iav21,eri1);
3818           }
3819 
3820           /// serialize this (needed for use in recursive_op)
serializeVphi_op_NS3821           template <typename Archive> void serialize(const Archive& ar) {
3822             ar & iaket & eri & result & leaf_op & iap1 & iap2 & iav1 & iav2;
3823           }
3824         };
3825 
3826         /// assemble the function V*phi using V and phi given from the functor
3827 
3828         /// this function must have been constructed using the CompositeFunctorInterface.
3829         /// The interface provides one- and two-electron potentials, and the ket, which are
3830         /// assembled to give V*phi.
3831         /// @param[in]  leaf_op  operator to decide if a given node is a leaf node
3832         /// @param[in]  fence   global fence
3833         template<typename opT>
3834         void make_Vphi(const opT& leaf_op, const bool fence=true) {
3835 
3836           const size_t LDIM=3;
3837 
3838           // keep the functor available, but remove it from the result
3839           // result will return false upon is_on_demand(), which is necessary for the
3840           // CoeffTracker to track the parent coeffs correctly for error_leaf_op
3841           std::shared_ptr< FunctionFunctorInterface<T,NDIM> > func2(this->get_functor());
3842           this->unset_functor();
3843 
3844           CompositeFunctorInterface<T,NDIM,LDIM>* func=
3845               dynamic_cast<CompositeFunctorInterface<T,NDIM,LDIM>* >(&(*func2));
3846           MADNESS_ASSERT(func);
3847 
3848           coeffs.clear();
3849           const keyT& key0=cdata.key0;
3850 
3851 
3852           FunctionImpl<T,NDIM>* ket=func->impl_ket.get();
3853           const FunctionImpl<T,NDIM>* eri=func->impl_eri.get();
3854           FunctionImpl<T,LDIM>* v1=func->impl_m1.get();
3855           FunctionImpl<T,LDIM>* v2=func->impl_m2.get();
3856           FunctionImpl<T,LDIM>* p1=func->impl_p1.get();
3857           FunctionImpl<T,LDIM>* p2=func->impl_p2.get();
3858 
3859           if (ket) ket->undo_redundant(false);
3860           if (v1) v1->undo_redundant(false);
3861           if (v2) v2->undo_redundant(false);
3862           if (p1) p1->undo_redundant(false);
3863           if (p2) p2->undo_redundant(false);	// fence here
3864           world.gop.fence();
3865 
3866           if (ket) ket->compress(true,true,false,false);
3867           if (v1) v1->compress(true,true,false,false);
3868           if (v2) v2->compress(true,true,false,false);
3869           if (p1) p1->compress(true,true,false,false);
3870           if (p2) p2->compress(true,true,false,false);	// fence here
3871           world.gop.fence();
3872           small=0;
3873           large=0;
3874 
3875           if (world.rank() == coeffs.owner(key0)) {
3876 
3877               // insert an empty internal node for comparison
3878               this->coeffs.replace(key0,nodeT(coeffT(),true));
3879 
3880               // prepare the CoeffTracker
3881               CoeffTracker<T,NDIM> iaket(ket);
3882               CoeffTracker<T,LDIM> iap1(p1);
3883               CoeffTracker<T,LDIM> iap2(p2);
3884               CoeffTracker<T,LDIM> iav1(v1);
3885               CoeffTracker<T,LDIM> iav2(v2);
3886 
3887               // the operator making the coefficients
3888               typedef Vphi_op_NS<opT,LDIM> coeff_opT;
3889               coeff_opT coeff_op(this,leaf_op,iaket,iap1,iap2,iav1,iav2,eri);
3890 
3891               // this operator simply inserts the coeffs into this' tree
3892               typedef noop<T,NDIM> apply_opT;
3893               apply_opT apply_op;
3894 
3895               woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
3896 			coeff_op, apply_op, cdata.key0);
3897           }
3898 
3899           world.gop.fence();
3900 
3901           // remove internal coefficients
3902           this->redundant=true;
3903           this->undo_redundant(false);
3904 
3905           // set right state
3906           this->compressed=false;
3907           this->on_demand=false;
3908           this->redundant=false;
3909           this->nonstandard=false;
3910           if (fence) world.gop.fence();
3911 
3912         }
3913 
3914 
3915 
3916         /// Permute the dimensions of f according to map, result on this
3917         void mapdim(const implT& f, const std::vector<long>& map, bool fence);
3918 
3919 
3920         /// take the average of two functions, similar to: this=0.5*(this+rhs)
3921 
3922         /// works in either basis and also in nonstandard form
3923         void average(const implT& rhs);
3924 
3925         /// change the tensor type of the coefficients in the FunctionNode
3926 
3927         /// @param[in]  targs   target tensor arguments (threshold and full/low rank)
3928         void change_tensor_type1(const TensorArgs& targs, bool fence);
3929 
3930         /// reduce the rank of the coefficients tensors
3931 
3932         /// @param[in]  targs   target tensor arguments (threshold and full/low rank)
3933         void reduce_rank(const TensorArgs& targs, bool fence);
3934 
3935         T eval_cube(Level n, coordT& x, const tensorT& c) const;
3936 
3937         /// Transform sum coefficients at level n to sums+differences at level n-1
3938 
3939         /// Given scaling function coefficients s[n][l][i] and s[n][l+1][i]
3940         /// return the scaling function and wavelet coefficients at the
3941         /// coarser level.  I.e., decompose Vn using Vn = Vn-1 + Wn-1.
3942         /// \code
3943         /// s_i = sum(j) h0_ij*s0_j + h1_ij*s1_j
3944         /// d_i = sum(j) g0_ij*s0_j + g1_ij*s1_j
3945         //  \endcode
3946         /// Returns a new tensor and has no side effects.  Works for any
3947         /// number of dimensions.
3948         ///
3949         /// No communication involved.
3950         tensorT filter(const tensorT& s) const;
3951 
3952         coeffT filter(const coeffT& s) const;
3953 
3954         ///  Transform sums+differences at level n to sum coefficients at level n+1
3955 
3956         ///  Given scaling function and wavelet coefficients (s and d)
3957         ///  returns the scaling function coefficients at the next finer
3958         ///  level.  I.e., reconstruct Vn using Vn = Vn-1 + Wn-1.
3959         ///  \code
3960         ///  s0 = sum(j) h0_ji*s_j + g0_ji*d_j
3961         ///  s1 = sum(j) h1_ji*s_j + g1_ji*d_j
3962         ///  \endcode
3963         ///  Returns a new tensor and has no side effects
3964         ///
3965         ///  If (sonly) ... then ss is only the scaling function coeff (and
3966         ///  assume the d are zero).  Works for any number of dimensions.
3967         ///
3968         /// No communication involved.
3969         tensorT unfilter(const tensorT& s) const;
3970 
3971         coeffT unfilter(const coeffT& s) const;
3972 
3973         /// downsample the sum coefficients of level n+1 to sum coeffs on level n
3974 
3975         /// specialization of the filter method, will yield only the sum coefficients
3976         /// @param[in]  key key of level n
3977         /// @param[in]  v   vector of sum coefficients of level n+1
3978         /// @return     sum coefficients on level n in full tensor format
3979         tensorT downsample(const keyT& key, const std::vector< Future<coeffT > >& v) const;
3980 
3981         /// upsample the sum coefficients of level 1 to sum coeffs on level n+1
3982 
3983         /// specialization of the unfilter method, will transform only the sum coefficients
3984         /// @param[in]  key     key of level n+1
3985         /// @param[in]  coeff   sum coefficients of level n (does NOT belong to key!!)
3986         /// @return     sum     coefficients on level n+1
3987         coeffT upsample(const keyT& key, const coeffT& coeff) const;
3988 
3989         /// Projects old function into new basis (only in reconstructed form)
3990         void project(const implT& old, bool fence);
3991 
3992         struct true_refine_test {
operatortrue_refine_test3993             bool operator()(const implT* f, const keyT& key, const nodeT& t) const {
3994                 return true;
3995             }
serializetrue_refine_test3996             template <typename Archive> void serialize(Archive& ar) {}
3997         };
3998 
3999         template <typename opT>
refine_op(const opT & op,const keyT & key)4000         void refine_op(const opT& op, const keyT& key) {
4001             // Must allow for someone already having autorefined the coeffs
4002             // and we get a write accessor just in case they are already executing
4003             typename dcT::accessor acc;
4004             const auto found = coeffs.find(acc,key);
4005             MADNESS_ASSERT(found);
4006             nodeT& node = acc->second;
4007             if (node.has_coeff() && key.level() < max_refine_level && op(this, key, node)) {
4008                 coeffT d(cdata.v2k,targs);
4009                 d(cdata.s0) += copy(node.coeff());
4010                 d = unfilter(d);
4011                 node.clear_coeff();
4012                 node.set_has_children(true);
4013                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
4014                     const keyT& child = kit.key();
4015                     coeffT ss = copy(d(child_patch(child)));
4016                     ss.reduce_rank(targs.thresh);
4017                     //                    coeffs.replace(child,nodeT(ss,-1.0,false).node_to_low_rank());
4018                     coeffs.replace(child,nodeT(ss,-1.0,false));
4019                     // Note value -1.0 for norm tree to indicate result of refinement
4020                 }
4021             }
4022         }
4023 
4024         template <typename opT>
refine_spawn(const opT & op,const keyT & key)4025         void refine_spawn(const opT& op, const keyT& key) {
4026             nodeT& node = coeffs.find(key).get()->second;
4027             if (node.has_children()) {
4028                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit)
4029                     woT::task(coeffs.owner(kit.key()), &implT:: template refine_spawn<opT>, op, kit.key(), TaskAttributes::hipri());
4030             }
4031             else {
4032                 woT::task(coeffs.owner(key), &implT:: template refine_op<opT>, op, key);
4033             }
4034         }
4035 
4036         // Refine in real space according to local user-defined criterion
4037         template <typename opT>
refine(const opT & op,bool fence)4038         void refine(const opT& op, bool fence) {
4039             if (world.rank() == coeffs.owner(cdata.key0))
4040                 woT::task(coeffs.owner(cdata.key0), &implT:: template refine_spawn<opT>, op, cdata.key0, TaskAttributes::hipri());
4041             if (fence)
4042                 world.gop.fence();
4043         }
4044 
4045         bool exists_and_has_children(const keyT& key) const;
4046 
4047         bool exists_and_is_leaf(const keyT& key) const;
4048 
4049 
4050         void broaden_op(const keyT& key, const std::vector< Future <bool> >& v);
4051 
4052         // For each local node sets value of norm tree to 0.0
4053         void zero_norm_tree();
4054 
4055         // Broaden tree
4056         void broaden(std::vector<bool> is_periodic, bool fence);
4057 
4058         /// sum all the contributions from all scales after applying an operator in mod-NS form
4059         void trickle_down(bool fence);
4060 
4061         /// sum all the contributions from all scales after applying an operator in mod-NS form
4062 
4063         /// cf reconstruct_op
4064         void trickle_down_op(const keyT& key, const coeffT& s);
4065 
4066         void reconstruct(bool fence);
4067 
4068         // Invoked on node where key is local
4069         //        void reconstruct_op(const keyT& key, const tensorT& s);
4070         void reconstruct_op(const keyT& key, const coeffT& s);
4071 
4072         /// compress the wave function
4073 
4074         /// after application there will be sum coefficients at the root level,
4075         /// and difference coefficients at all other levels; furthermore:
4076         /// @param[in] nonstandard	keep sum coeffs at all other levels, except leaves
4077         /// @param[in] keepleaves	keep sum coeffs (but no diff coeffs) at leaves
4078         /// @param[in] redundant    keep only sum coeffs at all levels, discard difference coeffs
4079         void compress(bool nonstandard, bool keepleaves, bool redundant, bool fence);
4080 
4081         // Invoked on node where key is local
4082         Future<coeffT > compress_spawn(const keyT& key, bool nonstandard, bool keepleaves, bool redundant);
4083 
4084         /// convert this to redundant, i.e. have sum coefficients on all levels
4085         void make_redundant(const bool fence);
4086 
4087         /// convert this from redundant to standard reconstructed form
4088         void undo_redundant(const bool fence);
4089 
4090 
4091         /// compute for each FunctionNode the norm of the function inside that node
4092         void norm_tree(bool fence);
4093 
4094         double norm_tree_op(const keyT& key, const std::vector< Future<double> >& v);
4095 
4096         Future<double> norm_tree_spawn(const keyT& key);
4097 
4098         /// truncate using a tree in reconstructed form
4099 
4100         /// must be invoked where key is local
4101         Future<coeffT> truncate_reconstructed_spawn(const keyT& key, const double tol);
4102 
4103         /// given the sum coefficients of all children, truncate or not
4104 
4105         /// @return     new sum coefficients (empty if internal, not empty, if new leaf); might delete its children
4106         coeffT truncate_reconstructed_op(const keyT& key, const std::vector< Future<coeffT > >& v, const double tol);
4107 
4108         /// calculate the wavelet coefficients using the sum coefficients of all child nodes
4109 
4110         /// @param[in] key 	this's key
4111         /// @param[in] v 	sum coefficients of the child nodes
4112         /// @param[in] nonstandard  keep the sum coefficients with the wavelet coefficients
4113         /// @param[in] redundant    keep only the sum coefficients, discard the wavelet coefficients
4114         /// @return 		the sum coefficients
4115         coeffT compress_op(const keyT& key, const std::vector< Future<coeffT > >& v, bool nonstandard, bool redundant);
4116 
4117 
4118         /// similar to compress_op, but insert only the sum coefficients in the tree
4119 
4120         /// @param[in] key  this's key
4121         /// @param[in] v    sum coefficients of the child nodes
4122         /// @return         the sum coefficients
4123         coeffT make_redundant_op(const keyT& key, const std::vector< Future<coeffT > >& v);
4124 
4125         /// Changes non-standard compressed form to standard compressed form
4126         void standard(bool fence);
4127 
4128         /// Changes non-standard compressed form to standard compressed form
4129         struct do_standard {
4130             typedef Range<typename dcT::iterator> rangeT;
4131 
4132             // threshold for rank reduction / SVD truncation
4133             implT* impl;
4134 
4135             // constructor takes target precision
do_standarddo_standard4136             do_standard() {}
do_standarddo_standard4137             do_standard(implT* impl) : impl(impl) {}
4138 
4139             //
operatordo_standard4140             bool operator()(typename rangeT::iterator& it) const {
4141 
4142                 const keyT& key = it->first;
4143                 nodeT& node = it->second;
4144                 if (key.level()> 0 && node.has_coeff()) {
4145                     if (node.has_children()) {
4146                         // Zero out scaling coeffs
4147                         node.coeff()(impl->cdata.s0)=0.0;
4148                         node.reduceRank(impl->targs.thresh);
4149                     } else {
4150                         // Deleting both scaling and wavelet coeffs
4151                         node.clear_coeff();
4152                     }
4153                 }
4154                 return true;
4155             }
serializedo_standard4156             template <typename Archive> void serialize(const Archive& ar) {
4157                 MADNESS_EXCEPTION("no serialization of do_standard",1);
4158             }
4159         };
4160 
4161 
4162         /// laziness
4163         template<size_t OPDIM>
4164         struct do_op_args {
4165             Key<OPDIM> key,d;
4166             keyT dest;
4167             double tol, fac, cnorm;
do_op_argsdo_op_args4168             do_op_args() {}
do_op_argsdo_op_args4169             do_op_args(const Key<OPDIM>& key, const Key<OPDIM>& d, const keyT& dest, double tol, double fac, double cnorm)
4170                 : key(key), d(d), dest(dest), tol(tol), fac(fac), cnorm(cnorm) {}
4171             template <class Archive>
serializedo_op_args4172             void serialize(Archive& ar) {
4173                 ar & archive::wrap_opaque(this,1);
4174             }
4175         };
4176 
4177         /// for fine-grain parallelism: call the apply method of an operator in a separate task
4178 
4179         /// @param[in]  op      the operator working on our function
4180         /// @param[in]  c       full rank tensor holding the NS coefficients
4181         /// @param[in]  args    laziness holding norm of the coefficients, displacement, destination, ..
4182         template <typename opT, typename R, size_t OPDIM>
do_apply_kernel(const opT * op,const Tensor<R> & c,const do_op_args<OPDIM> & args)4183         void do_apply_kernel(const opT* op, const Tensor<R>& c, const do_op_args<OPDIM>& args) {
4184 
4185             tensorT result = op->apply(args.key, args.d, c, args.tol/args.fac/args.cnorm);
4186 
4187             // Screen here to reduce communication cost of negligible data
4188             // and also to ensure we don't needlessly widen the tree when
4189             // applying the operator
4190             if (result.normf()> 0.3*args.tol/args.fac) {
4191                 Future<double> time=coeffs.task(args.dest, &nodeT::accumulate2, result, coeffs, args.dest, TaskAttributes::hipri());
4192                 //woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri());
4193                 // UGLY BUT ADDED THE OPTIMIZATION BACK IN HERE EXPLICITLY/
4194                 if (args.dest == world.rank()) {
4195                     coeffs.send(args.dest, &nodeT::accumulate, result, coeffs, args.dest);
4196                 }
4197                 else {
4198                     coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, TaskAttributes::hipri());
4199                 }
4200             }
4201         }
4202 
4203         /// same as do_apply_kernel, but use full rank tensors as input and low rank tensors as output
4204 
4205         /// @param[in]  op      the operator working on our function
4206         /// @param[in]  c       full rank tensor holding the NS coefficients
4207         /// @param[in]  args    laziness holding norm of the coefficients, displacement, destination, ..
4208         /// @param[in]  apply_targs TensorArgs with tightened threshold for accumulation
4209         /// @return     nothing, but accumulate the result tensor into the destination node
4210         template <typename opT, typename R, size_t OPDIM>
do_apply_kernel2(const opT * op,const Tensor<R> & c,const do_op_args<OPDIM> & args,const TensorArgs & apply_targs)4211         double do_apply_kernel2(const opT* op, const Tensor<R>& c, const do_op_args<OPDIM>& args,
4212                                 const TensorArgs& apply_targs) {
4213 
4214             tensorT result_full = op->apply(args.key, args.d, c, args.tol/args.fac/args.cnorm);
4215             const double norm=result_full.normf();
4216 
4217             // Screen here to reduce communication cost of negligible data
4218             // and also to ensure we don't needlessly widen the tree when
4219             // applying the operator
4220             // OPTIMIZATION NEEDED HERE ... CHANGING THIS TO TASK NOT SEND REMOVED
4221             // BUILTIN OPTIMIZATION TO SHORTCIRCUIT MSG IF DATA IS LOCAL
4222             if (norm > 0.3*args.tol/args.fac) {
4223 
4224                 small++;
4225                 //double cpu0=cpu_time();
4226                 coeffT result=coeffT(result_full,apply_targs);
4227                 MADNESS_ASSERT(result.tensor_type()==TT_FULL or result.tensor_type()==TT_2D);
4228                 //double cpu1=cpu_time();
4229                 //timer_lr_result.accumulate(cpu1-cpu0);
4230 
4231                 Future<double> time=coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, apply_targs,
4232                                                 TaskAttributes::hipri());
4233 
4234                 //woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri());
4235             }
4236             return norm;
4237         }
4238 
4239 
4240 
4241         /// same as do_apply_kernel2, but use low rank tensors as input and low rank tensors as output
4242 
4243         /// @param[in]  op      the operator working on our function
4244         /// @param[in]  coeff   full rank tensor holding the NS coefficients
4245         /// @param[in]  args    laziness holding norm of the coefficients, displacement, destination, ..
4246         /// @param[in]  apply_targs TensorArgs with tightened threshold for accumulation
4247         /// @return     nothing, but accumulate the result tensor into the destination node
4248         template <typename opT, typename R, size_t OPDIM>
do_apply_kernel3(const opT * op,const GenTensor<R> & coeff,const do_op_args<OPDIM> & args,const TensorArgs & apply_targs)4249         double do_apply_kernel3(const opT* op, const GenTensor<R>& coeff, const do_op_args<OPDIM>& args,
4250                                 const TensorArgs& apply_targs) {
4251 
4252             coeffT result;
4253             if (2*OPDIM==NDIM) result= op->apply2_lowdim(args.key, args.d, coeff,
4254                     args.tol/args.fac/args.cnorm, args.tol/args.fac);
4255             if (OPDIM==NDIM) result = op->apply2(args.key, args.d, coeff,
4256                     args.tol/args.fac/args.cnorm, args.tol/args.fac);
4257 
4258             const double result_norm=result.svd_normf();
4259 
4260             if (result_norm> 0.3*args.tol/args.fac) {
4261                 small++;
4262 
4263                 double cpu0=cpu_time();
4264                 if (targs.tt!=result.tensor_type()) result=result.convert(targs);
4265                 double cpu1=cpu_time();
4266                 timer_lr_result.accumulate(cpu1-cpu0);
4267 
4268                 // accumulate also expects result in SVD form
4269                 Future<double> time=coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, apply_targs,
4270                                                 TaskAttributes::hipri());
4271                 woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri());
4272 
4273             }
4274             return result_norm;
4275 
4276         }
4277 
4278         // volume of n-dimensional sphere of radius R
vol_nsphere(int n,double R)4279         double vol_nsphere(int n, double R) {
4280             return std::pow(madness::constants::pi,n*0.5)*std::pow(R,n)/std::tgamma(1+0.5*n);
4281         }
4282 
4283 
4284         /// apply an operator on the coeffs c (at node key)
4285 
4286         /// the result is accumulated inplace to this's tree at various FunctionNodes
4287         /// @param[in] op	the operator to act on the source function
4288         /// @param[in] key	key of the source FunctionNode of f which is processed
4289         /// @param[in] c	coeffs of the FunctionNode of f which is processed
4290         template <typename opT, typename R>
do_apply(const opT * op,const keyT & key,const Tensor<R> & c)4291         void do_apply(const opT* op, const keyT& key, const Tensor<R>& c) {
4292             PROFILE_MEMBER_FUNC(FunctionImpl);
4293 
4294 	    // working assumption here WAS that the operator is
4295 	    // isotropic and montonically decreasing with distance
4296 	    // ... however, now we are using derivative Gaussian
4297 	    // expansions (and also non-cubic boxes) isotropic is
4298 	    // violated. While not strictly monotonically decreasing,
4299 	    // the derivative gaussian is still such that once it
4300 	    // becomes negligible we are in the asymptotic region.
4301 
4302             typedef typename opT::keyT opkeyT;
4303             static const size_t opdim=opT::opdim;
4304             const opkeyT source=op->get_source_key(key);
4305 
4306 
4307             // Tuning here is based on observation that with
4308             // sufficiently high-order wavelet relative to the
4309             // precision, that only nearest neighbor boxes contribute,
4310             // whereas for low-order wavelets more neighbors will
4311             // contribute.  Sufficiently high is picked as
4312             // k>=2-log10(eps) which is our empirical rule for
4313             // efficiency/accuracy and code instrumentation has
4314             // previously indicated that (in 3D) just unit
4315             // displacements are invoked.  The error decays as R^-(k+1),
4316             // and the number of boxes increases as R^d.
4317             //
4318             // Fac is the expected number of contributions to a given
4319             // box, so the error permitted per contribution will be
4320             // tol/fac
4321 
4322             // radius of shell (nearest neighbor is diameter of 3 boxes, so radius=1.5)
4323             double radius = 1.5 + 0.33*std::max(0.0,2-std::log10(thresh)-k); // 0.33 was 0.5
4324             double fac = vol_nsphere(NDIM, radius);
4325             //previously fac=10.0 selected empirically constrained by qmprop
4326 
4327             double cnorm = c.normf();
4328 
4329             const std::vector<opkeyT>& disp = op->get_disp(key.level()); // list of displacements sorted in orer of increasing distance
4330             const std::vector<bool> is_periodic(NDIM,false); // Periodic sum is already done when making rnlp
4331 	    int ndone=1;	// Counts #done at each distance
4332 	    uint64_t distsq = 99999999999999;
4333             for (typename std::vector<opkeyT>::const_iterator it=disp.begin(); it != disp.end(); ++it) {
4334 	        keyT d;
4335                 Key<NDIM-opdim> nullkey(key.level());
4336                 if (op->particle()==1) d=it->merge_with(nullkey);
4337                 if (op->particle()==2) d=nullkey.merge_with(*it);
4338 
4339 		uint64_t dsq = d.distsq();
4340 		if (dsq != distsq) { // Moved to next shell of neighbors
4341 		    if (ndone == 0 && dsq > 1) {
4342 		        // Have at least done the input box and all first
4343 		        // nearest neighbors, and for all of the last set
4344 		        // of neighbors had no contribution.  Thus,
4345 		        // assuming monotonic decrease, we are done.
4346 		        break;
4347 		    }
4348 		    ndone = 0;
4349 		    distsq = dsq;
4350 		}
4351 
4352                 keyT dest = neighbor(key, d, is_periodic);
4353                 if (dest.is_valid()) {
4354                     double opnorm = op->norm(key.level(), *it, source);
4355                     double tol = truncate_tol(thresh, key);
4356 
4357                     if (cnorm*opnorm> tol/fac) {
4358 		        ndone++;
4359 		        tensorT result = op->apply(source, *it, c, tol/fac/cnorm);
4360 			if (result.normf() > 0.3*tol/fac) {
4361 			  if (coeffs.is_local(dest))
4362 			      coeffs.send(dest, &nodeT::accumulate2, result, coeffs, dest);
4363 			  else
4364   			      coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest);
4365                         }
4366                     }
4367                 }
4368             }
4369         }
4370 
4371 
4372         /// apply an operator on f to return this
4373         template <typename opT, typename R>
apply(opT & op,const FunctionImpl<R,NDIM> & f,bool fence)4374         void apply(opT& op, const FunctionImpl<R,NDIM>& f, bool fence) {
4375             PROFILE_MEMBER_FUNC(FunctionImpl);
4376             MADNESS_ASSERT(!op.modified());
4377             typename dcT::const_iterator end = f.coeffs.end();
4378             for (typename dcT::const_iterator it=f.coeffs.begin(); it!=end; ++it) {
4379                 // looping through all the coefficients in the source
4380                 const keyT& key = it->first;
4381                 const FunctionNode<R,NDIM>& node = it->second;
4382                 if (node.has_coeff()) {
4383                     if (node.coeff().dim(0) != k || op.doleaves) {
4384                         ProcessID p = FunctionDefaults<NDIM>::get_apply_randomize() ? world.random_proc() : coeffs.owner(key);
4385 //                        woT::task(p, &implT:: template do_apply<opT,R>, &op, key, node.coeff()); //.full_tensor_copy() ????? why copy ????
4386                         woT::task(p, &implT:: template do_apply<opT,R>, &op, key, node.coeff().reconstruct_tensor());
4387                     }
4388                 }
4389             }
4390             if (fence)
4391                 world.gop.fence();
4392 
4393             this->compressed=true;
4394             this->nonstandard=true;
4395             this->redundant=false;
4396 
4397         }
4398 
4399 
4400 
4401         /// apply an operator on the coeffs c (at node key)
4402 
4403         /// invoked by result; the result is accumulated inplace to this's tree at various FunctionNodes
4404         /// @param[in] op     the operator to act on the source function
4405         /// @param[in] key    key of the source FunctionNode of f which is processed (see "source")
4406         /// @param[in] coeff  coeffs of FunctionNode being processed
4407         /// @param[in] do_kernel	true: do the 0-disp only; false: do everything but the kernel
4408         /// @return	   max norm, and will modify or include new nodes in this' tree
4409         template <typename opT, typename R>
do_apply_directed_screening(const opT * op,const keyT & key,const coeffT & coeff,const bool & do_kernel)4410         double do_apply_directed_screening(const opT* op, const keyT& key, const coeffT& coeff,
4411                                            const bool& do_kernel) {
4412             PROFILE_MEMBER_FUNC(FunctionImpl);
4413             // insert timer here
4414             typedef typename opT::keyT opkeyT;
4415 
4416             // screening: contains all displacement keys that had small result norms
4417             std::list<opkeyT> blacklist;
4418 
4419             static const size_t opdim=opT::opdim;
4420             Key<NDIM-opdim> nullkey(key.level());
4421 
4422             // source is that part of key that corresponds to those dimensions being processed
4423             const opkeyT source=op->get_source_key(key);
4424 
4425             const double tol = truncate_tol(thresh, key);
4426 
4427             // fac is the root of the number of contributing neighbors (1st shell)
4428             double fac=std::pow(3,NDIM*0.5);
4429             double cnorm = coeff.normf();
4430 
4431             // for accumulation: keep slightly tighter TensorArgs
4432             TensorArgs apply_targs(targs);
4433             apply_targs.thresh=tol/fac*0.03;
4434 
4435             double maxnorm=0.0;
4436 
4437             // for the kernel it may be more efficient to do the convolution in full rank
4438             tensorT coeff_full;
4439             // for partial application (exchange operator) it's more efficient to
4440             // do SVD tensors instead of tensortrains, because addition in apply
4441             // can be done in full form for the specific particle
4442             coeffT coeff_SVD=coeff.convert(TensorArgs(-1.0,TT_2D));
4443 #ifdef USE_LRT
4444             coeff_SVD.impl.svd->orthonormalize(tol*LowRankTensor<T>::fac_reduce());
4445 #endif
4446 
4447             const std::vector<opkeyT>& disp = op->get_disp(key.level());
4448             const std::vector<bool> is_periodic(NDIM,false); // Periodic sum is already done when making rnlp
4449 
4450             for (typename std::vector<opkeyT>::const_iterator it=disp.begin(); it != disp.end(); ++it) {
4451                 const opkeyT& d = *it;
4452 
4453                 const int shell=d.distsq();
4454                 if (do_kernel and (shell>0)) break;
4455                 if ((not do_kernel) and (shell==0)) continue;
4456 
4457                 keyT disp1;
4458                 if (op->particle()==1) disp1=it->merge_with(nullkey);
4459                 else if (op->particle()==2) disp1=nullkey.merge_with(*it);
4460                 else {
4461                     MADNESS_EXCEPTION("confused particle in operato??",1);
4462                 }
4463 
4464                 keyT dest = neighbor(key, disp1, is_periodic);
4465 
4466                 if (not dest.is_valid()) continue;
4467 
4468                 // directed screening
4469                 // working assumption here is that the operator is isotropic and
4470                 // monotonically decreasing with distance
4471                 bool screened=false;
4472                 typename std::list<opkeyT>::const_iterator it2;
4473                 for (it2=blacklist.begin(); it2!=blacklist.end(); it2++) {
4474                     if (d.is_farther_out_than(*it2)) {
4475                         screened=true;
4476                         break;
4477                     }
4478                 }
4479                 if (not screened) {
4480 
4481                     double opnorm = op->norm(key.level(), d, source);
4482                     double norm=0.0;
4483 
4484                     if (cnorm*opnorm> tol/fac) {
4485 
4486                         double cost_ratio=op->estimate_costs(source, d, coeff_SVD, tol/fac/cnorm, tol/fac);
4487                         //                        cost_ratio=1.5;     // force low rank
4488                         //                        cost_ratio=0.5;     // force full rank
4489 
4490                         if (cost_ratio>0.0) {
4491 
4492                             do_op_args<opdim> args(source, d, dest, tol, fac, cnorm);
4493                             norm=0.0;
4494                             if (cost_ratio<1.0) {
4495                                 if (not coeff_full.has_data()) coeff_full=coeff.full_tensor_copy();
4496                                 norm=do_apply_kernel2(op, coeff_full,args,apply_targs);
4497                             } else {
4498                                 if (2*opdim==NDIM) {    // apply operator on one particle only
4499                                     norm=do_apply_kernel3(op,coeff_SVD,args,apply_targs);
4500                                 } else {
4501                                     norm=do_apply_kernel3(op,coeff,args,apply_targs);
4502                                 }
4503                             }
4504                             maxnorm=std::max(norm,maxnorm);
4505                         }
4506 
4507                     } else if (shell >= 12) {
4508                         break; // Assumes monotonic decay beyond nearest neighbor
4509                     }
4510                     if (norm<0.3*tol/fac) blacklist.push_back(d);
4511                 }
4512             }
4513             return maxnorm;
4514         }
4515 
4516 
4517         /// similar to apply, but for low rank coeffs
4518         template <typename opT, typename R>
apply_source_driven(opT & op,const FunctionImpl<R,NDIM> & f,bool fence)4519         void apply_source_driven(opT& op, const FunctionImpl<R,NDIM>& f, bool fence) {
4520             PROFILE_MEMBER_FUNC(FunctionImpl);
4521 
4522             MADNESS_ASSERT(not op.modified());
4523             // looping through all the coefficients of the source f
4524             typename dcT::const_iterator end = f.get_coeffs().end();
4525             for (typename dcT::const_iterator it=f.get_coeffs().begin(); it!=end; ++it) {
4526 
4527                 const keyT& key = it->first;
4528                 const coeffT& coeff = it->second.coeff();
4529 
4530                 if (coeff.has_data() and (coeff.rank()!=0)) {
4531                     ProcessID p = FunctionDefaults<NDIM>::get_apply_randomize() ? world.random_proc() : coeffs.owner(key);
4532                     woT::task(p, &implT:: template do_apply_directed_screening<opT,R>, &op, key, coeff, true);
4533                     woT::task(p, &implT:: template do_apply_directed_screening<opT,R>, &op, key, coeff, false);
4534                 }
4535             }
4536             if (fence) world.gop.fence();
4537         }
4538 
4539         /// after apply we need to do some cleanup;
4540         double finalize_apply(const bool fence=true);
4541 
4542         /// traverse a non-existing tree, make its coeffs and apply an operator
4543 
4544         /// invoked by result
4545         /// here we use the fact that the hi-dim NS coefficients on all scales are exactly
4546         /// the outer product of the underlying low-dim functions (also in NS form),
4547         /// so we don't need to construct the full hi-dim tree and then turn it into NS form.
4548         /// @param[in]	apply_op the operator acting on the NS tree
4549         /// @param[in]	fimpl    the funcimpl of the function of particle 1
4550         /// @param[in]	gimpl    the funcimpl of the function of particle 2
4551         template<typename opT, std::size_t LDIM>
recursive_apply(opT & apply_op,const FunctionImpl<T,LDIM> * fimpl,const FunctionImpl<T,LDIM> * gimpl,const bool fence)4552         void recursive_apply(opT& apply_op, const FunctionImpl<T,LDIM>* fimpl,
4553                              const FunctionImpl<T,LDIM>* gimpl, const bool fence) {
4554 
4555             //print("IN RECUR2");
4556             const keyT& key0=cdata.key0;
4557 
4558             if (world.rank() == coeffs.owner(key0)) {
4559 
4560                 CoeffTracker<T,LDIM> ff(fimpl);
4561                 CoeffTracker<T,LDIM> gg(gimpl);
4562 
4563                 typedef recursive_apply_op<opT,LDIM> coeff_opT;
4564                 coeff_opT coeff_op(this,ff,gg,&apply_op);
4565 
4566                 typedef noop<T,NDIM> apply_opT;
4567                 apply_opT apply_op;
4568 
4569                 ProcessID p= coeffs.owner(key0);
4570                 woT::task(p, &implT:: template forward_traverse<coeff_opT,apply_opT>, coeff_op, apply_op, key0);
4571 
4572             }
4573             if (fence) world.gop.fence();
4574         }
4575 
4576         /// recursive part of recursive_apply
4577         template<typename opT, std::size_t LDIM>
4578         struct recursive_apply_op {
randomizerecursive_apply_op4579             bool randomize() const {return true;}
4580 
4581             typedef recursive_apply_op<opT,LDIM> this_type;
4582 
4583             implT* result;
4584             CoeffTracker<T,LDIM> iaf;
4585             CoeffTracker<T,LDIM> iag;
4586             opT* apply_op;
4587 
4588             // ctor
recursive_apply_oprecursive_apply_op4589             recursive_apply_op() {}
recursive_apply_oprecursive_apply_op4590             recursive_apply_op(implT* result,
4591                                const CoeffTracker<T,LDIM>& iaf, const CoeffTracker<T,LDIM>& iag,
4592                                const opT* apply_op) : result(result), iaf(iaf), iag(iag), apply_op(apply_op)
4593             {
4594                 MADNESS_ASSERT(LDIM+LDIM==NDIM);
4595             }
recursive_apply_oprecursive_apply_op4596             recursive_apply_op(const recursive_apply_op& other) : result(other.result), iaf(other.iaf),
4597                                                                   iag(other.iag), apply_op(other.apply_op) {}
4598 
4599 
4600             /// make the NS-coefficients and send off the application of the operator
4601 
4602             /// @return		a Future<bool,coeffT>(is_leaf,coeffT())
operatorrecursive_apply_op4603             std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const {
4604 
4605                 //            	World& world=result->world;
4606                 // break key into particles (these are the child keys, with datum1/2 come the parent keys)
4607                 Key<LDIM> key1,key2;
4608                 key.break_apart(key1,key2);
4609 
4610                 // the lo-dim functions should be in full tensor form
4611                 const tensorT fcoeff=iaf.coeff(key1).full_tensor();
4612                 const tensorT gcoeff=iag.coeff(key2).full_tensor();
4613 
4614                 // would this be a leaf node? If so, then its sum coeffs have already been
4615                 // processed by the parent node's wavelet coeffs. Therefore we won't
4616                 // process it any more.
4617                 hartree_leaf_op<T,NDIM> leaf_op(result,result->get_k());
4618                 bool is_leaf=leaf_op(key,fcoeff,gcoeff);
4619 
4620                 if (not is_leaf) {
4621                     // new coeffs are simply the hartree/kronecker/outer product --
4622                     const std::vector<Slice>& s0=iaf.get_impl()->cdata.s0;
4623                     const coeffT coeff = (apply_op->modified())
4624                         ? outer(copy(fcoeff(s0)),copy(gcoeff(s0)),result->targs)
4625                         : outer(fcoeff,gcoeff,result->targs);
4626 
4627                     // now send off the application
4628                     tensorT coeff_full;
4629                     ProcessID p=result->world.rank();
4630                     double norm0=result->do_apply_directed_screening<opT,T>(apply_op, key, coeff, true);
4631 
4632                     result->task(p,&implT:: template do_apply_directed_screening<opT,T>,
4633                                  apply_op,key,coeff,false);
4634 
4635                     return finalize(norm0,key,coeff);
4636 
4637                 } else {
4638                     return std::pair<bool,coeffT> (is_leaf,coeffT());
4639                 }
4640             }
4641 
4642             /// sole purpose is to wait for the kernel norm, wrap it and send it back to caller
finalizerecursive_apply_op4643             std::pair<bool,coeffT> finalize(const double kernel_norm, const keyT& key,
4644                                             const coeffT& coeff) const {
4645             	const double thresh=result->get_thresh()*0.1;
4646             	bool is_leaf=(kernel_norm<result->truncate_tol(thresh,key));
4647             	if (key.level()<2) is_leaf=false;
4648             	return std::pair<bool,coeffT> (is_leaf,coeff);
4649             }
4650 
4651 
make_childrecursive_apply_op4652             this_type make_child(const keyT& child) const {
4653 
4654                 // break key into particles
4655                 Key<LDIM> key1, key2;
4656                 child.break_apart(key1,key2);
4657 
4658                 return this_type(result,iaf.make_child(key1),iag.make_child(key2),apply_op);
4659             }
4660 
activaterecursive_apply_op4661             Future<this_type> activate() const {
4662             	Future<CoeffTracker<T,LDIM> > f1=iaf.activate();
4663             	Future<CoeffTracker<T,LDIM> > g1=iag.activate();
4664                 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
4665                                                &this_type::forward_ctor),result,f1,g1,apply_op);
4666             }
4667 
forward_ctorrecursive_apply_op4668             this_type forward_ctor(implT* r, const CoeffTracker<T,LDIM>& f1, const CoeffTracker<T,LDIM>& g1,
4669                                    const opT* apply_op1) {
4670             	return this_type(r,f1,g1,apply_op1);
4671             }
4672 
serializerecursive_apply_op4673             template <typename Archive> void serialize(const Archive& ar) {
4674                 ar & result & iaf & iag & apply_op;
4675             }
4676         };
4677 
4678         /// traverse an existing tree and apply an operator
4679 
4680         /// invoked by result
4681         /// @param[in]	apply_op the operator acting on the NS tree
4682         /// @param[in]	fimpl    the funcimpl of the source function
4683         /// @param[in]	rimpl    a dummy function for recursive_op to insert data
4684         template<typename opT>
recursive_apply(opT & apply_op,const implT * fimpl,implT * rimpl,const bool fence)4685         void recursive_apply(opT& apply_op, const implT* fimpl, implT* rimpl, const bool fence) {
4686 
4687             print("IN RECUR1");
4688 
4689             const keyT& key0=cdata.key0;
4690 
4691             if (world.rank() == coeffs.owner(key0)) {
4692 
4693                 typedef recursive_apply_op2<opT> coeff_opT;
4694                 coeff_opT coeff_op(this,fimpl,&apply_op);
4695 
4696                 typedef noop<T,NDIM> apply_opT;
4697                 apply_opT apply_op;
4698 
4699                 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
4700                           coeff_op, apply_op, cdata.key0);
4701 
4702             }
4703             if (fence) world.gop.fence();
4704         }
4705 
4706         /// recursive part of recursive_apply
4707         template<typename opT>
4708         struct recursive_apply_op2 {
randomizerecursive_apply_op24709             bool randomize() const {return true;}
4710 
4711             typedef recursive_apply_op2<opT> this_type;
4712             typedef CoeffTracker<T,NDIM> ctT;
4713             typedef std::pair<bool,coeffT> argT;
4714 
4715             mutable implT* result;
4716             ctT iaf;			/// need this for randomization
4717             const opT* apply_op;
4718 
4719             // ctor
recursive_apply_op2recursive_apply_op24720             recursive_apply_op2() {}
recursive_apply_op2recursive_apply_op24721             recursive_apply_op2(implT* result, const ctT& iaf, const opT* apply_op)
4722             	: result(result), iaf(iaf), apply_op(apply_op) {}
4723 
recursive_apply_op2recursive_apply_op24724             recursive_apply_op2(const recursive_apply_op2& other) : result(other.result),
4725                                                                     iaf(other.iaf), apply_op(other.apply_op) {}
4726 
4727 
4728             /// send off the application of the operator
4729 
4730             /// the first (core) neighbor (ie. the box itself) is processed
4731             /// immediately, all other ones are shoved into the taskq
4732             /// @return		a pair<bool,coeffT>(is_leaf,coeffT())
operatorrecursive_apply_op24733             argT operator()(const Key<NDIM>& key) const {
4734 
4735             	const coeffT& coeff=iaf.coeff();
4736 
4737                 if (coeff.has_data()) {
4738 
4739                     // now send off the application for all neighbor boxes
4740                     ProcessID p=result->world.rank();
4741                     result->task(p,&implT:: template do_apply_directed_screening<opT,T>,
4742                                  apply_op, key, coeff, false);
4743 
4744                     // process the core box
4745                     double norm0=result->do_apply_directed_screening<opT,T>(apply_op,key,coeff,true);
4746 
4747                     if (iaf.is_leaf()) return argT(true,coeff);
4748                     return finalize(norm0,key,coeff,result);
4749 
4750                 } else {
4751                     const bool is_leaf=true;
4752                     return argT(is_leaf,coeffT());
4753                 }
4754             }
4755 
4756             /// sole purpose is to wait for the kernel norm, wrap it and send it back to caller
finalizerecursive_apply_op24757             argT finalize(const double kernel_norm, const keyT& key,
4758                           const coeffT& coeff, const implT* r) const {
4759             	const double thresh=r->get_thresh()*0.1;
4760             	bool is_leaf=(kernel_norm<r->truncate_tol(thresh,key));
4761             	if (key.level()<2) is_leaf=false;
4762             	return argT(is_leaf,coeff);
4763             }
4764 
4765 
make_childrecursive_apply_op24766             this_type make_child(const keyT& child) const {
4767                 return this_type(result,iaf.make_child(child),apply_op);
4768             }
4769 
4770             /// retrieve the coefficients (parent coeffs might be remote)
activaterecursive_apply_op24771             Future<this_type> activate() const {
4772             	Future<ctT> f1=iaf.activate();
4773 
4774 //                Future<ctL> g1=g.activate();
4775 //                return h->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
4776 //                                          &this_type::forward_ctor),h,f1,g1,particle);
4777 
4778                 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
4779                                                &this_type::forward_ctor),result,f1,apply_op);
4780             }
4781 
4782             /// taskq-compatible ctor
forward_ctorrecursive_apply_op24783             this_type forward_ctor(implT* result1, const ctT& iaf1, const opT* apply_op1) {
4784             	return this_type(result1,iaf1,apply_op1);
4785             }
4786 
serializerecursive_apply_op24787             template <typename Archive> void serialize(const Archive& ar) {
4788                 ar & result & iaf & apply_op;
4789             }
4790         };
4791 
4792         /// Returns the square of the error norm in the box labeled by key
4793 
4794         /// Assumed to be invoked locally but it would be easy to eliminate
4795         /// this assumption
4796         template <typename opT>
err_box(const keyT & key,const nodeT & node,const opT & func,int npt,const Tensor<double> & qx,const Tensor<double> & quad_phit,const Tensor<double> & quad_phiw)4797         double err_box(const keyT& key, const nodeT& node, const opT& func,
4798                        int npt, const Tensor<double>& qx, const Tensor<double>& quad_phit,
4799                        const Tensor<double>& quad_phiw) const {
4800 
4801             std::vector<long> vq(NDIM);
4802             for (std::size_t i=0; i<NDIM; ++i)
4803                 vq[i] = npt;
4804             tensorT fval(vq,false), work(vq,false), result(vq,false);
4805 
4806             // Compute the "exact" function in this volume at npt points
4807             // where npt is usually this->npt+1.
4808             fcube(key, func, qx, fval);
4809 
4810             // Transform into the scaling function basis of order npt
4811             double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
4812             fval = fast_transform(fval,quad_phiw,result,work).scale(scale);
4813 
4814             // Subtract to get the error ... the original coeffs are in the order k
4815             // basis but we just computed the coeffs in the order npt(=k+1) basis
4816             // so we can either use slices or an iterator macro.
4817             const tensorT coeff = node.coeff().full_tensor_copy();
4818             ITERATOR(coeff,fval(IND)-=coeff(IND););
4819             // flo note: we do want to keep a full tensor here!
4820 
4821             // Compute the norm of what remains
4822             double err = fval.normf();
4823             return err*err;
4824         }
4825 
4826         template <typename opT>
4827         class do_err_box {
4828             const implT* impl;
4829             const opT* func;
4830             int npt;
4831             Tensor<double> qx;
4832             Tensor<double> quad_phit;
4833             Tensor<double> quad_phiw;
4834         public:
do_err_box()4835             do_err_box() {}
4836 
do_err_box(const implT * impl,const opT * func,int npt,const Tensor<double> & qx,const Tensor<double> & quad_phit,const Tensor<double> & quad_phiw)4837             do_err_box(const implT* impl, const opT* func, int npt, const Tensor<double>& qx,
4838                        const Tensor<double>& quad_phit, const Tensor<double>& quad_phiw)
4839                 : impl(impl), func(func), npt(npt), qx(qx), quad_phit(quad_phit), quad_phiw(quad_phiw) {}
4840 
do_err_box(const do_err_box & e)4841             do_err_box(const do_err_box& e)
4842                 : impl(e.impl), func(e.func), npt(e.npt), qx(e.qx), quad_phit(e.quad_phit), quad_phiw(e.quad_phiw) {}
4843 
operator()4844             double operator()(typename dcT::const_iterator& it) const {
4845                 const keyT& key = it->first;
4846                 const nodeT& node = it->second;
4847                 if (node.has_coeff())
4848                     return impl->err_box(key, node, *func, npt, qx, quad_phit, quad_phiw);
4849                 else
4850                     return 0.0;
4851             }
4852 
operator()4853             double operator()(double a, double b) const {
4854                 return a+b;
4855             }
4856 
4857             template <typename Archive>
serialize(const Archive & ar)4858             void serialize(const Archive& ar) {
4859                 throw "not yet";
4860             }
4861         };
4862 
4863         /// Returns the sum of squares of errors from local info ... no comms
4864         template <typename opT>
errsq_local(const opT & func)4865         double errsq_local(const opT& func) const {
4866             PROFILE_MEMBER_FUNC(FunctionImpl);
4867             // Make quadrature rule of higher order
4868             const int npt = cdata.npt + 1;
4869             Tensor<double> qx, qw, quad_phi, quad_phiw, quad_phit;
4870             FunctionCommonData<T,NDIM>::_init_quadrature(k+1, npt, qx, qw, quad_phi, quad_phiw, quad_phit);
4871 
4872             typedef Range<typename dcT::const_iterator> rangeT;
4873             rangeT range(coeffs.begin(), coeffs.end());
4874             return world.taskq.reduce< double,rangeT,do_err_box<opT> >(range,
4875                                                                        do_err_box<opT>(this, &func, npt, qx, quad_phit, quad_phiw));
4876         }
4877 
4878         /// Returns \c int(f(x),x) in local volume
4879         T trace_local() const;
4880 
4881         struct do_norm2sq_local {
operatordo_norm2sq_local4882             double operator()(typename dcT::const_iterator& it) const {
4883                 const nodeT& node = it->second;
4884                 if (node.has_coeff()) {
4885                     double norm = node.coeff().normf();
4886                     return norm*norm;
4887                 }
4888                 else {
4889                     return 0.0;
4890                 }
4891             }
4892 
operatordo_norm2sq_local4893             double operator()(double a, double b) const {
4894                 return (a+b);
4895             }
4896 
serializedo_norm2sq_local4897             template <typename Archive> void serialize(const Archive& ar) {
4898                 throw "NOT IMPLEMENTED";
4899             }
4900         };
4901 
4902 
4903         /// Returns the square of the local norm ... no comms
4904         double norm2sq_local() const;
4905 
4906         /// compute the inner product of this range with other
4907         template<typename R>
4908         struct do_inner_local {
4909             const FunctionImpl<R,NDIM>* other;
4910             bool leaves_only;
4911             typedef TENSOR_RESULT_TYPE(T,R) resultT;
4912 
do_inner_localdo_inner_local4913             do_inner_local(const FunctionImpl<R,NDIM>* other, const bool leaves_only)
4914             	: other(other), leaves_only(leaves_only) {}
operatordo_inner_local4915             resultT operator()(typename dcT::const_iterator& it) const {
4916 
4917             	TENSOR_RESULT_TYPE(T,R) sum=0.0;
4918             	const keyT& key=it->first;
4919                 const nodeT& fnode = it->second;
4920                 if (fnode.has_coeff()) {
4921                     if (other->coeffs.probe(it->first)) {
4922                         const FunctionNode<R,NDIM>& gnode = other->coeffs.find(key).get()->second;
4923                         if (gnode.has_coeff()) {
4924                             if (gnode.coeff().dim(0) != fnode.coeff().dim(0)) {
4925                                 madness::print("INNER", it->first, gnode.coeff().dim(0),fnode.coeff().dim(0));
4926                                 MADNESS_EXCEPTION("functions have different k or compress/reconstruct error", 0);
4927                             }
4928                             if (leaves_only) {
4929                                 if (gnode.is_leaf() or fnode.is_leaf()) {
4930                                     sum += fnode.coeff().trace_conj(gnode.coeff());
4931                                 }
4932                             } else {
4933                                 sum += fnode.coeff().trace_conj(gnode.coeff());
4934                             }
4935                         }
4936                     }
4937                 }
4938                 return sum;
4939             }
4940 
operatordo_inner_local4941             resultT operator()(resultT a, resultT b) const {
4942                 return (a+b);
4943             }
4944 
serializedo_inner_local4945             template <typename Archive> void serialize(const Archive& ar) {
4946                 throw "NOT IMPLEMENTED";
4947             }
4948         };
4949 
4950         /// Returns the inner product ASSUMING same distribution
4951 
4952         /// handles compressed and redundant form
4953         template <typename R>
TENSOR_RESULT_TYPE(T,R)4954         TENSOR_RESULT_TYPE(T,R) inner_local(const FunctionImpl<R,NDIM>& g) const {
4955             PROFILE_MEMBER_FUNC(FunctionImpl);
4956             typedef Range<typename dcT::const_iterator> rangeT;
4957             typedef TENSOR_RESULT_TYPE(T,R) resultT;
4958 
4959             // make sure the states of the trees are consistent
4960             MADNESS_ASSERT(this->is_redundant()==g.is_redundant());
4961             bool leaves_only=(this->is_redundant());
4962             return world.taskq.reduce<resultT,rangeT,do_inner_local<R> >
4963                 (rangeT(coeffs.begin(),coeffs.end()),do_inner_local<R>(&g, leaves_only));
4964         }
4965 
4966         /// Type of the entry in the map returned by make_key_vec_map
4967         typedef std::vector< std::pair<int,const coeffT*> > mapvecT;
4968 
4969         /// Type of the map returned by make_key_vec_map
4970         typedef ConcurrentHashMap< keyT, mapvecT > mapT;
4971 
4972         /// Adds keys to union of local keys with specified index
add_keys_to_map(mapT * map,int index)4973         void add_keys_to_map(mapT* map, int index) const {
4974             typename dcT::const_iterator end = coeffs.end();
4975             for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
4976                 typename mapT::accessor acc;
4977                 const keyT& key = it->first;
4978                 const FunctionNode<T,NDIM>& node = it->second;
4979                 if (node.has_coeff()) {
4980                     map->insert(acc,key);
4981                     acc->second.push_back(std::make_pair(index,&(node.coeff())));
4982                 }
4983             }
4984         }
4985 
4986         /// Returns map of union of local keys to vector of indexes of functions containing that key
4987 
4988         /// Local concurrency and synchronization only; no communication
4989         static
4990         mapT
make_key_vec_map(const std::vector<const FunctionImpl<T,NDIM> * > & v)4991         make_key_vec_map(const std::vector<const FunctionImpl<T,NDIM>*>& v) {
4992             mapT map(100000);
4993             // This loop must be parallelized
4994             for (unsigned int i=0; i<v.size(); i++) {
4995                 //v[i]->add_keys_to_map(&map,i);
4996                 v[i]->world.taskq.add(*(v[i]), &FunctionImpl<T,NDIM>::add_keys_to_map, &map, int(i));
4997             }
4998             if (v.size()) v[0]->world.taskq.fence();
4999             return map;
5000         }
5001 
5002 
5003         template <typename R>
do_inner_localX(const typename mapT::iterator lstart,const typename mapT::iterator lend,typename FunctionImpl<R,NDIM>::mapT * rmap_ptr,const bool sym,Tensor<TENSOR_RESULT_TYPE (T,R)> * result_ptr,Mutex * mutex)5004         static void do_inner_localX(const typename mapT::iterator lstart,
5005                                     const typename mapT::iterator lend,
5006                                     typename FunctionImpl<R,NDIM>::mapT* rmap_ptr,
5007                                     const bool sym,
5008                                     Tensor< TENSOR_RESULT_TYPE(T,R) >* result_ptr,
5009                                     Mutex* mutex) {
5010             Tensor< TENSOR_RESULT_TYPE(T,R) >& result = *result_ptr;
5011             Tensor< TENSOR_RESULT_TYPE(T,R) > r(result.dim(0),result.dim(1));
5012             for (typename mapT::iterator lit=lstart; lit!=lend; ++lit) {
5013                 const keyT& key = lit->first;
5014                 typename FunctionImpl<R,NDIM>::mapT::iterator rit=rmap_ptr->find(key);
5015                 if (rit != rmap_ptr->end()) {
5016                     const mapvecT& leftv = lit->second;
5017                     const typename FunctionImpl<R,NDIM>::mapvecT& rightv =rit->second;
5018                     const int nleft = leftv.size();
5019                     const int nright= rightv.size();
5020 
5021                     for (int iv=0; iv<nleft; iv++) {
5022                         const int i = leftv[iv].first;
5023                         const GenTensor<T>* iptr = leftv[iv].second;
5024 
5025                         for (int jv=0; jv<nright; jv++) {
5026                             const int j = rightv[jv].first;
5027                             const GenTensor<R>* jptr = rightv[jv].second;
5028 
5029                             if (!sym || (sym && i<=j))
5030                                 r(i,j) += iptr->trace_conj(*jptr);
5031                         }
5032                     }
5033                 }
5034             }
5035             mutex->lock();
5036             result += r;
5037             mutex->unlock();
5038         }
5039 
conj(double x)5040         static double conj(double x) {
5041             return x;
5042         }
5043 
conj(float x)5044         static double conj(float x) {
5045             return x;
5046         }
5047 
conj(const std::complex<double> x)5048         static std::complex<double> conj(const std::complex<double> x) {
5049             return std::conj(x);
5050         }
5051 
5052         template <typename R>
5053         static Tensor< TENSOR_RESULT_TYPE(T,R) >
inner_local(const std::vector<const FunctionImpl<T,NDIM> * > & left,const std::vector<const FunctionImpl<R,NDIM> * > & right,bool sym)5054         inner_local(const std::vector<const FunctionImpl<T,NDIM>*>& left,
5055                     const std::vector<const FunctionImpl<R,NDIM>*>& right,
5056                     bool sym) {
5057 
5058             // This is basically a sparse matrix^T * matrix product
5059             // Rij = sum(k) Aki * Bkj
5060             // where i and j index functions and k index the wavelet coeffs
5061             // eventually the goal is this structure (don't have jtile yet)
5062             //
5063             // do in parallel tiles of k (tensors of coeffs)
5064             //    do tiles of j
5065             //       do i
5066             //          do j in jtile
5067             //             do k in ktile
5068             //                Rij += Aki*Bkj
5069 
5070             mapT lmap = make_key_vec_map(left);
5071             typename FunctionImpl<R,NDIM>::mapT rmap;
5072             typename FunctionImpl<R,NDIM>::mapT* rmap_ptr = (typename FunctionImpl<R,NDIM>::mapT*)(&lmap);
5073             if ((std::vector<const FunctionImpl<R,NDIM>*>*)(&left) != &right) {
5074                 rmap = FunctionImpl<R,NDIM>::make_key_vec_map(right);
5075                 rmap_ptr = &rmap;
5076             }
5077 
5078             size_t chunk = (lmap.size()-1)/(3*4*5)+1;
5079 
5080             Tensor< TENSOR_RESULT_TYPE(T,R) > r(left.size(), right.size());
5081             Mutex mutex;
5082 
5083             typename mapT::iterator lstart=lmap.begin();
5084             while (lstart != lmap.end()) {
5085                 typename mapT::iterator lend = lstart;
5086                 advance(lend,chunk);
5087                 left[0]->world.taskq.add(&FunctionImpl<T,NDIM>::do_inner_localX<R>, lstart, lend, rmap_ptr, sym, &r, &mutex);
5088                 lstart = lend;
5089             }
5090             left[0]->world.taskq.fence();
5091 
5092             if (sym) {
5093                 for (long i=0; i<r.dim(0); i++) {
5094                     for (long j=0; j<i; j++) {
5095                         TENSOR_RESULT_TYPE(T,R) sum = r(i,j)+conj(r(j,i));
5096                         r(i,j) = sum;
5097                         r(j,i) = conj(sum);
5098                     }
5099                 }
5100             }
5101             return r;
5102         }
5103 
5104         /// Return the inner product with an external function on a specified function node.
5105         /// @param[in] key Key of the function node to compute the inner product on. (the domain of integration)
5106         /// @param[in] c Tensor of coefficients for the function at the function node given by key
5107         /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
5108         /// @return Returns the inner product over the domain of a single function node, no guarantee of accuracy.
inner_ext_node(keyT key,tensorT c,const std::shared_ptr<FunctionFunctorInterface<T,NDIM>> f)5109         T inner_ext_node(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f) const {
5110             tensorT fvals = tensorT(this->cdata.vk);
5111             // Compute the value of the external function at the quadrature points.
5112             fcube(key, *(f), cdata.quad_x, fvals);
5113             // Convert quadrature point values to scaling coefficients.
5114             tensorT fc = tensorT(values2coeffs(key, fvals));
5115             // Return the inner product of the two functions' scaling coefficients.
5116             return c.trace_conj(fc);
5117         }
5118 
5119         /// Call inner_ext_node recursively until convergence.
5120         /// @param[in] key Key of the function node on which to compute inner product (the domain of integration)
5121         /// @param[in] c coeffs for the function at the node given by key
5122         /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
5123         /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
5124         /// @param[in] old_inner the inner product on the parent function node
5125         /// @return Returns the inner product over the domain of a single function, checks for convergence.
5126         T inner_ext_recursive(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine, T old_inner=T(0)) const {
5127             int i = 0;
5128             tensorT c_child, inner_child;
5129             T new_inner, result = 0.0;
5130 
5131             c_child = tensorT(cdata.v2k); // tensor of child coeffs
5132             inner_child = Tensor<double>(pow(2, NDIM)); // child inner products
5133 
5134             // If old_inner is default value, assume this is the first call
5135             // and compute inner product on this node.
5136             if (old_inner == T(0)) {
5137                 old_inner = inner_ext_node(key, c, f);
5138             }
5139 
5140             if (coeffs.find(key).get()->second.has_children()) {
5141                 // Since the key has children and we know the func is redundant,
5142                 // Iterate over all children of this compute node, computing
5143                 // the inner product on each child node. new_inner will store
5144                 // the sum of these, yielding a more accurate inner product.
5145                 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) {
5146                     const keyT& child = it.key();
5147                     tensorT cc = coeffs.find(child).get()->second.coeff().full_tensor_copy();
5148                     inner_child(i) = inner_ext_node(child, cc, f);
5149                 }
5150                 new_inner = inner_child.sum();
5151             } else if (leaf_refine) {
5152                 // We need the scaling coefficients of the numerical function
5153                 // at each of the children nodes. We can't use project because
5154                 // there is no guarantee that the numerical function will have
5155                 // a functor.  Instead, since we know we are at or below the
5156                 // leaf nodes, the wavelet coefficients are zero (to within the
5157                 // truncate tolerance). Thus, we can use unfilter() to
5158                 // get the scaling coefficients at the next level.
5159                 tensorT d = tensorT(cdata.v2k);
5160                 d = T(0);
5161                 d(cdata.s0) = copy(c);
5162                 c_child = unfilter(d);
5163 
5164                 // Iterate over all children of this compute node, computing
5165                 // the inner product on each child node. new_inner will store
5166                 // the sum of these, yielding a more accurate inner product.
5167                 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) {
5168                     const keyT& child = it.key();
5169                     tensorT cc = tensorT(c_child(child_patch(child)));
5170                     inner_child(i) = inner_ext_node(child, cc, f);
5171                 }
5172                 new_inner = inner_child.sum();
5173             } else {
5174                 // If we get to here, we are at the leaf nodes and the user has
5175                 // specified that they do not want refinement past leaf nodes.
5176                 new_inner = old_inner;
5177             }
5178 
5179             // Check for convergence. If converged...yay, we're done. If not,
5180             // call inner_ext_node_recursive on each child node and accumulate
5181             // the inner product in result.
5182             // if (std::abs(new_inner - old_inner) <= truncate_tol(thresh, key)) {
5183             if (std::abs(new_inner - old_inner) <= thresh) {
5184                 result = new_inner;
5185             } else {
5186                 i = 0;
5187                 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) {
5188                     const keyT& child = it.key();
5189                     tensorT cc = tensorT(c_child(child_patch(child)));
5190                     result += inner_ext_recursive(child, cc, f, leaf_refine, inner_child(i));
5191                 }
5192             }
5193 
5194             return result;
5195         }
5196 
5197         struct do_inner_ext_local_ffi {
5198             const std::shared_ptr< FunctionFunctorInterface<T, NDIM> > fref;
5199             const implT * impl;
5200             const bool leaf_refine;
5201             const bool do_leaves;   ///< start with leaf nodes instead of initial_level
5202 
do_inner_ext_local_ffido_inner_ext_local_ffi5203             do_inner_ext_local_ffi(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f,
5204                     const implT * impl, const bool leaf_refine, const bool do_leaves)
5205                     : fref(f), impl(impl), leaf_refine(leaf_refine), do_leaves(do_leaves) {};
5206 
operatordo_inner_ext_local_ffi5207             T operator()(typename dcT::const_iterator& it) const {
5208                 if (do_leaves and it->second.is_leaf()) {
5209                     tensorT cc = it->second.coeff().full_tensor();
5210                     return impl->inner_adaptive_recursive(it->first, cc, fref, leaf_refine, T(0));
5211                 } else if ((not do_leaves) and (it->first.level() == impl->initial_level)) {
5212                     tensorT cc = it->second.coeff().full_tensor();
5213                     return impl->inner_ext_recursive(it->first, cc, fref, leaf_refine, T(0));
5214                 } else {
5215                     return 0.0;
5216                 }
5217             }
5218 
operatordo_inner_ext_local_ffi5219             T operator()(T a, T b) const {
5220                 return (a + b);
5221             }
5222 
serializedo_inner_ext_local_ffi5223             template <typename Archive> void serialize(const Archive& ar) {
5224                 throw "NOT IMPLEMENTED";
5225             }
5226         };
5227 
5228         /// Return the local part of inner product with external function ... no communication.
5229         /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
5230         /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
5231         /// @return Returns local part of the inner product, i.e. over the domain of all function nodes on this compute node.
inner_ext_local(const std::shared_ptr<FunctionFunctorInterface<T,NDIM>> f,const bool leaf_refine)5232         T inner_ext_local(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine) const {
5233             typedef Range<typename dcT::const_iterator> rangeT;
5234 
5235             return world.taskq.reduce<T, rangeT, do_inner_ext_local_ffi>(rangeT(coeffs.begin(),coeffs.end()),
5236                     do_inner_ext_local_ffi(f, this, leaf_refine, false));
5237         }
5238 
5239         /// Return the local part of inner product with external function ... no communication.
5240         /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
5241         /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
5242         /// @return Returns local part of the inner product, i.e. over the domain of all function nodes on this compute node.
inner_adaptive_local(const std::shared_ptr<FunctionFunctorInterface<T,NDIM>> f,const bool leaf_refine)5243         T inner_adaptive_local(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine) const {
5244             typedef Range<typename dcT::const_iterator> rangeT;
5245 
5246             return world.taskq.reduce<T, rangeT, do_inner_ext_local_ffi>(rangeT(coeffs.begin(),coeffs.end()),
5247                     do_inner_ext_local_ffi(f, this, leaf_refine, true));
5248         }
5249 
5250         /// Call inner_ext_node recursively until convergence.
5251         /// @param[in] key Key of the function node on which to compute inner product (the domain of integration)
5252         /// @param[in] c coeffs for the function at the node given by key
5253         /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function
5254         /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes
5255         /// @param[in] old_inner the inner product on the parent function node
5256         /// @return Returns the inner product over the domain of a single function, checks for convergence.
5257         T inner_adaptive_recursive(keyT key, const tensorT& c,
5258                 const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f,
5259                 const bool leaf_refine, T old_inner=T(0)) const {
5260 
5261             // the inner product in the current node
5262             old_inner = inner_ext_node(key, c, f);
5263             T result=0.0;
5264 
5265             // the inner product in the child nodes
5266 
5267             // compute the sum coefficients of the MRA function
5268             tensorT d = tensorT(cdata.v2k);
5269             d = T(0);
5270             d(cdata.s0) = copy(c);
5271             tensorT c_child = unfilter(d);
5272 
5273             // compute the inner product in the child nodes
5274             T new_inner=0.0; // child inner products
5275             for (KeyChildIterator<NDIM> it(key); it; ++it) {
5276                 const keyT& child = it.key();
5277                 tensorT cc = tensorT(c_child(child_patch(child)));
5278                 new_inner+= inner_ext_node(child, cc, f);
5279             }
5280 
5281             // continue recursion if needed
5282             const double tol=truncate_tol(thresh,key);
5283             if (leaf_refine and (std::abs(new_inner - old_inner) > tol)) {
5284                 for (KeyChildIterator<NDIM> it(key); it; ++it) {
5285                     const keyT& child = it.key();
5286                     tensorT cc = tensorT(c_child(child_patch(child)));
5287                     result += inner_adaptive_recursive(child, cc, f, leaf_refine, T(0));
5288                 }
5289             } else {
5290                 result = new_inner;
5291             }
5292             return result;
5293 
5294         }
5295 
5296 
5297         /// Return the gaxpy product with an external function on a specified
5298         /// function node.
5299         /// @param[in] key Key of the function node on which to compute gaxpy
5300         /// @param[in] lc Tensor of coefficients for the function at the
5301         ///            function node given by key
5302         /// @param[in] f Pointer to function of type T that takes coordT
5303         ///            arguments. This is the externally provided function and
5304         ///            the right argument of gaxpy.
5305         /// @param[in] alpha prefactor of c Tensor for gaxpy
5306         /// @param[in] beta prefactor of fcoeffs for gaxpy
5307         /// @return Returns coefficient tensor of the gaxpy product at specified
5308         ///         key, no guarantee of accuracy.
5309         template <typename L>
gaxpy_ext_node(keyT key,Tensor<L> lc,T (* f)(const coordT &),T alpha,T beta)5310         tensorT gaxpy_ext_node(keyT key, Tensor<L> lc, T (*f)(const coordT&), T alpha, T beta) const {
5311             // Compute the value of external function at the quadrature points.
5312             tensorT fvals = madness::fcube(key, f, cdata.quad_x);
5313             // Convert quadrature point values to scaling coefficients.
5314             tensorT fcoeffs = values2coeffs(key, fvals);
5315             // Return the inner product of the two functions' scaling coeffs.
5316             tensorT c2 = copy(lc);
5317             c2.gaxpy(alpha, fcoeffs, beta);
5318             return c2;
5319         }
5320 
5321         /// Return out of place gaxpy using recursive descent.
5322         /// @param[in] key Key of the function node on which to compute gaxpy
5323         /// @param[in] left FunctionImpl, left argument of gaxpy
5324         /// @param[in] lcin coefficients of left at this node
5325         /// @param[in] c coefficients of gaxpy product at this node
5326         /// @param[in] f pointer to function of type T that takes coordT
5327         ///            arguments. This is the externally provided function and
5328         ///            the right argument of gaxpy.
5329         /// @param[in] alpha prefactor of left argument for gaxpy
5330         /// @param[in] beta prefactor of right argument for gaxpy
5331         /// @param[in] tol convergence tolerance...when the norm of the gaxpy's
5332         ///            difference coefficients is less than tol, we are done.
5333         template <typename L>
gaxpy_ext_recursive(const keyT & key,const FunctionImpl<L,NDIM> * left,Tensor<L> lcin,tensorT c,T (* f)(const coordT &),T alpha,T beta,double tol,bool below_leaf)5334         void gaxpy_ext_recursive(const keyT& key, const FunctionImpl<L,NDIM>* left,
5335                                  Tensor<L> lcin, tensorT c, T (*f)(const coordT&),
5336                                  T alpha, T beta, double tol, bool below_leaf) {
5337             typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT;
5338 
5339             // If we haven't yet reached the leaf level, check whether the
5340             // current key is a leaf node of left. If so, set below_leaf to true
5341             // and continue. If not, make this a parent, recur down, return.
5342             if (not below_leaf) {
5343                 bool left_leaf = left->coeffs.find(key).get()->second.is_leaf();
5344                 if (left_leaf) {
5345                     below_leaf = true;
5346                 } else {
5347                     this->coeffs.replace(key, nodeT(coeffT(), true));
5348                     for (KeyChildIterator<NDIM> it(key); it; ++it) {
5349                         const keyT& child = it.key();
5350                         woT::task(left->coeffs.owner(child), &implT:: template gaxpy_ext_recursive<L>,
5351                                   child, left, Tensor<L>(), tensorT(), f, alpha, beta, tol, below_leaf);
5352                     }
5353                     return;
5354                 }
5355             }
5356 
5357             // Compute left's coefficients if not provided
5358             Tensor<L> lc = lcin;
5359             if (lc.size() == 0) {
5360                 literT it = left->coeffs.find(key).get();
5361                 MADNESS_ASSERT(it != left->coeffs.end());
5362                 if (it->second.has_coeff())
5363                     lc = it->second.coeff().full_tensor_copy();
5364             }
5365 
5366             // Compute this node's coefficients if not provided in function call
5367             if (c.size() == 0) {
5368                 c = gaxpy_ext_node(key, lc, f, alpha, beta);
5369             }
5370 
5371             // We need the scaling coefficients of the numerical function at
5372             // each of the children nodes. We can't use project because there
5373             // is no guarantee that the numerical function will have a functor.
5374             // Instead, since we know we are at or below the leaf nodes, the
5375             // wavelet coefficients are zero (to within the truncate tolerance).
5376             // Thus, we can use unfilter() to get the scaling coefficients at
5377             // the next level.
5378             Tensor<L> lc_child = Tensor<L>(cdata.v2k); // left's child coeffs
5379             Tensor<L> ld = Tensor<L>(cdata.v2k);
5380             ld = L(0);
5381             ld(cdata.s0) = copy(lc);
5382             lc_child = unfilter(ld);
5383 
5384             // Iterate over children of this node,
5385             // storing the gaxpy coeffs in c_child
5386             tensorT c_child = tensorT(cdata.v2k); // tensor of child coeffs
5387             for (KeyChildIterator<NDIM> it(key); it; ++it) {
5388                 const keyT& child = it.key();
5389                 tensorT lcoeff = tensorT(lc_child(child_patch(child)));
5390                 c_child(child_patch(child)) = gaxpy_ext_node(child, lcoeff, f, alpha, beta);
5391             }
5392 
5393             // Compute the difference coefficients to test for convergence.
5394             tensorT d = tensorT(cdata.v2k);
5395             d = filter(c_child);
5396             // Filter returns both s and d coefficients, so set scaling
5397             // coefficient part of d to 0 so that we take only the
5398             // norm of the difference coefficients.
5399             d(cdata.s0) = T(0);
5400             double dnorm = d.normf();
5401 
5402             // Small d.normf means we've reached a good level of resolution
5403             // Store the coefficients and return.
5404             if (dnorm <= truncate_tol(tol,key)) {
5405                 this->coeffs.replace(key, nodeT(coeffT(c,targs), false));
5406             } else {
5407                 // Otherwise, make this a parent node and recur down
5408                 this->coeffs.replace(key, nodeT(coeffT(), true)); // Interior node
5409 
5410                 for (KeyChildIterator<NDIM> it(key); it; ++it) {
5411                     const keyT& child = it.key();
5412                     tensorT child_coeff = tensorT(c_child(child_patch(child)));
5413                     tensorT left_coeff = tensorT(lc_child(child_patch(child)));
5414                     woT::task(left->coeffs.owner(child), &implT:: template gaxpy_ext_recursive<L>,
5415                               child, left, left_coeff, child_coeff, f, alpha, beta, tol, below_leaf);
5416                 }
5417             }
5418         }
5419 
5420         template <typename L>
gaxpy_ext(const FunctionImpl<L,NDIM> * left,T (* f)(const coordT &),T alpha,T beta,double tol,bool fence)5421         void gaxpy_ext(const FunctionImpl<L,NDIM>* left, T (*f)(const coordT&), T alpha, T beta, double tol, bool fence) {
5422             if (world.rank() == coeffs.owner(cdata.key0))
5423                 gaxpy_ext_recursive<L> (cdata.key0, left, Tensor<L>(), tensorT(), f, alpha, beta, tol, false);
5424             if (fence)
5425                 world.gop.fence();
5426         }
5427 
5428         /// project the low-dim function g on the hi-dim function f: result(x) = <this(x,y) | g(y)>
5429 
5430         /// invoked by the hi-dim function, a function of NDIM+LDIM
5431 
5432         /// Upon return, result matches this, with contributions on all scales
5433         /// @param[in]  result  lo-dim function of NDIM-LDIM \todo Should this be param[out]?
5434         /// @param[in]  gimpl  	lo-dim function of LDIM
5435         /// @param[in]  dim over which dimensions to be integrated: 0..LDIM or LDIM..LDIM+NDIM-1
5436         template<size_t LDIM>
project_out(FunctionImpl<T,NDIM-LDIM> * result,const FunctionImpl<T,LDIM> * gimpl,const int dim,const bool fence)5437         void project_out(FunctionImpl<T,NDIM-LDIM>* result, const FunctionImpl<T,LDIM>* gimpl,
5438                          const int dim, const bool fence) {
5439 
5440             const keyT& key0=cdata.key0;
5441 
5442             if (world.rank() == coeffs.owner(key0)) {
5443 
5444                 // coeff_op will accumulate the result
5445                 typedef project_out_op<LDIM> coeff_opT;
5446                 coeff_opT coeff_op(this,result,CoeffTracker<T,LDIM>(gimpl),dim);
5447 
5448                 // don't do anything on this -- coeff_op will accumulate into result
5449                 typedef noop<T,NDIM> apply_opT;
5450                 apply_opT apply_op;
5451 
5452                 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
5453                           coeff_op, apply_op, cdata.key0);
5454 
5455             }
5456             if (fence) world.gop.fence();
5457 
5458         }
5459 
5460 
5461         /// project the low-dim function g on the hi-dim function f: result(x) = <f(x,y) | g(y)>
5462         template<size_t LDIM>
5463         struct project_out_op {
randomizeproject_out_op5464             bool randomize() const {return false;}
5465 
5466             typedef project_out_op<LDIM> this_type;
5467             typedef CoeffTracker<T,LDIM> ctL;
5468             typedef FunctionImpl<T,NDIM-LDIM> implL1;
5469             typedef std::pair<bool,coeffT> argT;
5470 
5471             const implT* fimpl;		///< the hi dim function f
5472             mutable implL1* result;	///< the low dim result function
5473             ctL iag;				///< the low dim function g
5474             int dim;				///< 0: project 0..LDIM-1, 1: project LDIM..NDIM-1
5475 
5476             // ctor
project_out_opproject_out_op5477             project_out_op() {}
project_out_opproject_out_op5478             project_out_op(const implT* fimpl, implL1* result, const ctL& iag, const int dim)
5479                 : fimpl(fimpl), result(result), iag(iag), dim(dim) {}
project_out_opproject_out_op5480             project_out_op(const project_out_op& other)
5481                 : fimpl(other.fimpl), result(other.result), iag(other.iag), dim(other.dim) {}
5482 
5483 
5484             /// do the actual contraction
operatorproject_out_op5485             Future<argT> operator()(const Key<NDIM>& key) const {
5486 
5487             	Key<LDIM> key1,key2,dest;
5488             	key.break_apart(key1,key2);
5489 
5490             	// make the right coefficients
5491                 coeffT gcoeff;
5492                 if (dim==0) {
5493                     gcoeff=iag.get_impl()->parent_to_child(iag.coeff(),iag.key(),key1);
5494                     dest=key2;
5495                 }
5496                 if (dim==1) {
5497                     gcoeff=iag.get_impl()->parent_to_child(iag.coeff(),iag.key(),key2);
5498                     dest=key1;
5499                 }
5500 
5501                 MADNESS_ASSERT(fimpl->get_coeffs().probe(key));		// must be local!
5502                 const nodeT& fnode=fimpl->get_coeffs().find(key).get()->second;
5503                 const coeffT& fcoeff=fnode.coeff();
5504 
5505                 // fast return if possible
5506                 if (fcoeff.has_no_data() or gcoeff.has_no_data())
5507                     return Future<argT> (argT(fnode.is_leaf(),coeffT()));;
5508 
5509                 // let's specialize for the time being on SVD tensors for f and full tensors of half dim for g
5510                 MADNESS_ASSERT(gcoeff.tensor_type()==TT_FULL);
5511                 MADNESS_ASSERT(fcoeff.tensor_type()==TT_2D);
5512                 const tensorT gtensor=gcoeff.full_tensor();
5513                 tensorT final(result->cdata.vk);
5514 
5515                 const int otherdim=(dim+1)%2;
5516                 const int k=fcoeff.dim(0);
5517                 std::vector<Slice> s(fcoeff.config().dim_per_vector()+1,_);
5518 
5519                 // do the actual contraction
5520                 for (int r=0; r<fcoeff.rank(); ++r) {
5521                     s[0]=Slice(r,r);
5522                     const tensorT contracted_tensor=fcoeff.config().ref_vector(dim)(s).reshape(k,k,k);
5523                     const tensorT other_tensor=fcoeff.config().ref_vector(otherdim)(s).reshape(k,k,k);
5524                     const double ovlp= gtensor.trace_conj(contracted_tensor);
5525                     const double fac=ovlp * fcoeff.config().weights(r);
5526                     final+=fac*other_tensor;
5527                 }
5528 
5529                 // accumulate the result
5530                 result->coeffs.task(dest, &FunctionNode<T,LDIM>::accumulate2, final, result->coeffs, dest, TaskAttributes::hipri());
5531 
5532                 return Future<argT> (argT(fnode.is_leaf(),coeffT()));
5533             }
5534 
make_childproject_out_op5535             this_type make_child(const keyT& child) const {
5536             	Key<LDIM> key1,key2;
5537             	child.break_apart(key1,key2);
5538             	const Key<LDIM> gkey = (dim==0) ? key1 : key2;
5539 
5540                 return this_type(fimpl,result,iag.make_child(gkey),dim);
5541             }
5542 
5543             /// retrieve the coefficients (parent coeffs might be remote)
activateproject_out_op5544             Future<this_type> activate() const {
5545             	Future<ctL> g1=iag.activate();
5546                 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this),
5547                                                &this_type::forward_ctor),fimpl,result,g1,dim);
5548             }
5549 
5550             /// taskq-compatible ctor
forward_ctorproject_out_op5551             this_type forward_ctor(const implT* fimpl1, implL1* result1, const ctL& iag1, const int dim1) {
5552             	return this_type(fimpl1,result1,iag1,dim1);
5553             }
5554 
serializeproject_out_op5555             template <typename Archive> void serialize(const Archive& ar) {
5556                 ar & result & iag & fimpl & dim;
5557             }
5558 
5559         };
5560 
5561 
5562         /// project the low-dim function g on the hi-dim function f: this(x) = <f(x,y) | g(y)>
5563 
5564         /// invoked by result, a function of NDIM
5565 
5566         /// @param[in]  f   hi-dim function of LDIM+NDIM
5567         /// @param[in]  g   lo-dim function of LDIM
5568         /// @param[in]  dim over which dimensions to be integrated: 0..LDIM or LDIM..LDIM+NDIM-1
5569         template<size_t LDIM>
project_out2(const FunctionImpl<T,LDIM+NDIM> * f,const FunctionImpl<T,LDIM> * g,const int dim)5570         void project_out2(const FunctionImpl<T,LDIM+NDIM>* f, const FunctionImpl<T,LDIM>* g, const int dim) {
5571 
5572             typedef std::pair< keyT,coeffT > pairT;
5573             typedef typename FunctionImpl<T,NDIM+LDIM>::dcT::const_iterator fiterator;
5574 
5575             // loop over all nodes of hi-dim f, compute the inner products with all
5576             // appropriate nodes of g, and accumulate in result
5577             fiterator end = f->get_coeffs().end();
5578             for (fiterator it=f->get_coeffs().begin(); it!=end; ++it) {
5579                 const Key<LDIM+NDIM> key=it->first;
5580                 const FunctionNode<T,LDIM+NDIM> fnode=it->second;
5581                 const coeffT& fcoeff=fnode.coeff();
5582 
5583                 if (fnode.is_leaf() and fcoeff.has_data()) {
5584 
5585                     // break key into particle: over key1 will be summed, over key2 will be
5586                     // accumulated, or vice versa, depending on dim
5587                     if (dim==0) {
5588                         Key<NDIM> key1;
5589                         Key<LDIM> key2;
5590                         key.break_apart(key1,key2);
5591 
5592                         Future<pairT> result;
5593                         //                        sock_it_to_me(key1, result.remote_ref(world));
5594                         g->task(coeffs.owner(key1), &implT::sock_it_to_me, key1, result.remote_ref(world), TaskAttributes::hipri());
5595                         woT::task(world.rank(),&implT:: template do_project_out<LDIM>,fcoeff,result,key1,key2,dim);
5596 
5597                     } else if (dim==1) {
5598                         Key<LDIM> key1;
5599                         Key<NDIM> key2;
5600                         key.break_apart(key1,key2);
5601 
5602                         Future<pairT> result;
5603                         //                        sock_it_to_me(key2, result.remote_ref(world));
5604                         g->task(coeffs.owner(key2), &implT::sock_it_to_me, key2, result.remote_ref(world), TaskAttributes::hipri());
5605                         woT::task(world.rank(),&implT:: template do_project_out<LDIM>,fcoeff,result,key2,key1,dim);
5606 
5607                     } else {
5608                         MADNESS_EXCEPTION("confused dim in project_out",1);
5609                     }
5610                 }
5611             }
5612             this->compressed=false;
5613             this->nonstandard=false;
5614             this->redundant=true;
5615         }
5616 
5617 
5618         /// compute the inner product of two nodes of only some dimensions and accumulate on result
5619 
5620         /// invoked by result
5621         /// @param[in]  fcoeff  coefficients of high dimension LDIM+NDIM
5622         /// @param[in]  gpair   key and coeffs of low dimension LDIM (possibly a parent node)
5623         /// @param[in]  gkey    key of actual low dim node (possibly the same as gpair.first, iff gnode exists)
5624         /// @param[in]  dest    destination node for the result
5625         /// @param[in]  dim     which dimensions should be contracted: 0..LDIM-1 or LDIM..NDIM+LDIM-1
5626         template<size_t LDIM>
do_project_out(const coeffT & fcoeff,const std::pair<keyT,coeffT> gpair,const keyT & gkey,const Key<NDIM> & dest,const int dim)5627         void do_project_out(const coeffT& fcoeff, const std::pair<keyT,coeffT> gpair, const keyT& gkey,
5628                             const Key<NDIM>& dest, const int dim) const {
5629 
5630             const coeffT gcoeff=parent_to_child(gpair.second,gpair.first,gkey);
5631 
5632             // fast return if possible
5633             if (fcoeff.has_no_data() or gcoeff.has_no_data()) return;
5634 
5635             // let's specialize for the time being on SVD tensors for f and full tensors of half dim for g
5636             MADNESS_ASSERT(gcoeff.tensor_type()==TT_FULL);
5637             MADNESS_ASSERT(fcoeff.tensor_type()==TT_2D);
5638             const tensorT gtensor=gcoeff.full_tensor();
5639             tensorT result(cdata.vk);
5640 
5641             const int otherdim=(dim+1)%2;
5642             const int k=fcoeff.dim(0);
5643             std::vector<Slice> s(fcoeff.config().dim_per_vector()+1,_);
5644 
5645             // do the actual contraction
5646             for (int r=0; r<fcoeff.rank(); ++r) {
5647                 s[0]=Slice(r,r);
5648                 const tensorT contracted_tensor=fcoeff.config().ref_vector(dim)(s).reshape(k,k,k);
5649                 const tensorT other_tensor=fcoeff.config().ref_vector(otherdim)(s).reshape(k,k,k);
5650                 const double ovlp= gtensor.trace_conj(contracted_tensor);
5651                 const double fac=ovlp * fcoeff.config().weights(r);
5652                 result+=fac*other_tensor;
5653             }
5654 
5655             // accumulate the result
5656             coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri());
5657         }
5658 
5659 
5660 
5661 
5662         /// Returns the maximum local depth of the tree ... no communications.
5663         std::size_t max_local_depth() const;
5664 
5665 
5666         /// Returns the maximum depth of the tree ... collective ... global sum/broadcast
5667         std::size_t max_depth() const;
5668 
5669         /// Returns the max number of nodes on a processor
5670         std::size_t max_nodes() const;
5671 
5672         /// Returns the min number of nodes on a processor
5673         std::size_t min_nodes() const;
5674 
5675         /// Returns the size of the tree structure of the function ... collective global sum
5676         std::size_t tree_size() const;
5677 
5678         /// Returns the number of coefficients in the function ... collective global sum
5679         std::size_t size() const;
5680 
5681         /// Returns the number of coefficients in the function ... collective global sum
5682         std::size_t real_size() const;
5683 
5684         /// print tree size and size
5685         void print_size(const std::string name) const;
5686 
5687         /// print the number of configurations per node
5688         void print_stats() const;
5689 
5690         /// In-place scale by a constant
5691         void scale_inplace(const T q, bool fence);
5692 
5693         /// Out-of-place scale by a constant
5694         template <typename Q, typename F>
scale_oop(const Q q,const FunctionImpl<F,NDIM> & f,bool fence)5695         void scale_oop(const Q q, const FunctionImpl<F,NDIM>& f, bool fence) {
5696             typedef typename FunctionImpl<F,NDIM>::nodeT fnodeT;
5697             typedef typename FunctionImpl<F,NDIM>::dcT fdcT;
5698             typename fdcT::const_iterator end = f.coeffs.end();
5699             for (typename fdcT::const_iterator it=f.coeffs.begin(); it!=end; ++it) {
5700                 const keyT& key = it->first;
5701                 const fnodeT& node = it->second;
5702 
5703                 if (node.has_coeff()) {
5704                     coeffs.replace(key,nodeT(node.coeff()*q,node.has_children()));
5705                 }
5706                 else {
5707                     coeffs.replace(key,nodeT(coeffT(),node.has_children()));
5708                 }
5709             }
5710             if (fence)
5711                 world.gop.fence();
5712         }
5713     };
5714 
5715     namespace archive {
5716         template <class Archive, class T, std::size_t NDIM>
5717         struct ArchiveLoadImpl<Archive,const FunctionImpl<T,NDIM>*> {
5718             static void load(const Archive& ar, const FunctionImpl<T,NDIM>*& ptr) {
5719                 bool exists=false;
5720                 ar & exists;
5721                 if (exists) {
5722                     uniqueidT id;
5723                     ar & id;
5724                     World* world = World::world_from_id(id.get_world_id());
5725                     MADNESS_ASSERT(world);
5726                     ptr = static_cast< const FunctionImpl<T,NDIM>*>(world->ptr_from_id< WorldObject< FunctionImpl<T,NDIM> > >(id));
5727                     if (!ptr)
5728                         MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use a locally uninitialized object",0);
5729                 } else {
5730                     ptr=nullptr;
5731                 }
5732             }
5733         };
5734 
5735         template <class Archive, class T, std::size_t NDIM>
5736         struct ArchiveStoreImpl<Archive,const FunctionImpl<T,NDIM>*> {
5737             static void store(const Archive& ar, const FunctionImpl<T,NDIM>*const& ptr) {
5738                 bool exists=(ptr) ? true : false;
5739                 ar & exists;
5740                 if (exists) ar & ptr->id();
5741             }
5742         };
5743 
5744         template <class Archive, class T, std::size_t NDIM>
5745         struct ArchiveLoadImpl<Archive, FunctionImpl<T,NDIM>*> {
5746             static void load(const Archive& ar, FunctionImpl<T,NDIM>*& ptr) {
5747                 bool exists=false;
5748                 ar & exists;
5749                 if (exists) {
5750                     uniqueidT id;
5751                     ar & id;
5752                     World* world = World::world_from_id(id.get_world_id());
5753                     MADNESS_ASSERT(world);
5754                     ptr = static_cast< FunctionImpl<T,NDIM>*>(world->ptr_from_id< WorldObject< FunctionImpl<T,NDIM> > >(id));
5755                     if (!ptr)
5756                         MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use a locally uninitialized object",0);
5757                 } else {
5758                     ptr=nullptr;
5759                 }
5760             }
5761         };
5762 
5763         template <class Archive, class T, std::size_t NDIM>
5764         struct ArchiveStoreImpl<Archive, FunctionImpl<T,NDIM>*> {
5765             static void store(const Archive& ar, FunctionImpl<T,NDIM>*const& ptr) {
5766                 bool exists=(ptr) ? true : false;
5767                 ar & exists;
5768                 if (exists) ar & ptr->id();
5769                 //                ar & ptr->id();
5770             }
5771         };
5772 
5773         template <class Archive, class T, std::size_t NDIM>
5774         struct ArchiveLoadImpl<Archive, std::shared_ptr<const FunctionImpl<T,NDIM> > > {
5775             static void load(const Archive& ar, std::shared_ptr<const FunctionImpl<T,NDIM> >& ptr) {
5776                 const FunctionImpl<T,NDIM>* f = nullptr;
5777                 ArchiveLoadImpl<Archive, const FunctionImpl<T,NDIM>*>::load(ar, f);
5778                 ptr.reset(f, [] (const FunctionImpl<T,NDIM> *p_) -> void {});
5779             }
5780         };
5781 
5782         template <class Archive, class T, std::size_t NDIM>
5783         struct ArchiveStoreImpl<Archive, std::shared_ptr<const FunctionImpl<T,NDIM> > > {
5784             static void store(const Archive& ar, const std::shared_ptr<const FunctionImpl<T,NDIM> >& ptr) {
5785                 ArchiveStoreImpl<Archive, const FunctionImpl<T,NDIM>*>::store(ar, ptr.get());
5786             }
5787         };
5788 
5789         template <class Archive, class T, std::size_t NDIM>
5790         struct ArchiveLoadImpl<Archive, std::shared_ptr<FunctionImpl<T,NDIM> > > {
5791             static void load(const Archive& ar, std::shared_ptr<FunctionImpl<T,NDIM> >& ptr) {
5792                 FunctionImpl<T,NDIM>* f = nullptr;
5793                 ArchiveLoadImpl<Archive, FunctionImpl<T,NDIM>*>::load(ar, f);
5794                 ptr.reset(f, [] (FunctionImpl<T,NDIM> *p_) -> void {});
5795             }
5796         };
5797 
5798         template <class Archive, class T, std::size_t NDIM>
5799         struct ArchiveStoreImpl<Archive, std::shared_ptr<FunctionImpl<T,NDIM> > > {
5800             static void store(const Archive& ar, const std::shared_ptr<FunctionImpl<T,NDIM> >& ptr) {
5801                 ArchiveStoreImpl<Archive, FunctionImpl<T,NDIM>*>::store(ar, ptr.get());
5802             }
5803         };
5804     }
5805 
5806 }
5807 
5808 #endif // MADNESS_MRA_FUNCIMPL_H__INCLUDED
5809