1 /*
2   This file is part of MADNESS.
3 
4   Copyright (C) 2007,2010 Oak Ridge National Laboratory
5 
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 2 of the License, or
9   (at your option) any later version.
10 
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14   GNU General Public License for more details.
15 
16   You should have received a copy of the GNU General Public License
17   along with this program; if not, write to the Free Software
18   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 
20   For more information please contact:
21 
22   Robert J. Harrison
23   Oak Ridge National Laboratory
24   One Bethel Valley Road
25   P.O. Box 2008, MS-6367
26 
27   email: harrisonrj@ornl.gov
28   tel:   865-241-3937
29   fax:   865-572-0680
30 */
31 
32 #ifndef MADNESS_MRA_MRAIMPL_H__INCLUDED
33 #define MADNESS_MRA_MRAIMPL_H__INCLUDED
34 
35 #ifndef MPRAIMPLX
36 #error "mraimpl.h should ONLY be included in one of the mraX.cc files (x=1..6)"
37 #endif
38 
39 //#define WORLD_INSTANTIATE_STATIC_TEMPLATES
40 #include <memory>
41 #include <math.h>
42 #include <cmath>
43 #include <madness/world/world_object.h>
44 #include <madness/world/worlddc.h>
45 #include <madness/world/worldhashmap.h>
46 #include <madness/mra/function_common_data.h>
47 
48 #include <madness/mra/funcimpl.h>
49 #include <madness/mra/displacements.h>
50 
51 namespace std {
52     template <typename T>
isnan(const std::complex<T> & v)53     bool isnan(const std::complex<T>& v) {
54         return ::std::isnan(v.real()) || ::std::isnan(v.imag());
55     }
56 }
57 
58 /// \file mra/mraimpl.h
59 /// \brief Declaration and initialization of static data, some implementation, some instantiation
60 
61 namespace madness {
62 
63     // Definition and initialization of FunctionDefaults static members
64     // It cannot be an instance of FunctionFactory since we want to
65     // set the defaults independent of the data type.
66 
67     template <typename T, std::size_t NDIM>
_init_twoscale()68     void FunctionCommonData<T,NDIM>::_init_twoscale() {
69         if (! two_scale_hg(k, &hg)) throw "failed to get twoscale coefficients";
70         hgT = copy(transpose(hg));
71 
72         Slice sk(0,k-1), sk2(k,-1);
73         hgsonly = copy(hg(Slice(0,k-1),_));
74 
75         h0 = copy(hg(sk,sk));
76         h1 = copy(hg(sk,sk2));
77         g0 = copy(hg(sk2,sk));
78         g1 = copy(hg(sk2,sk2));
79 
80         h0T = copy(transpose(hg(sk,sk)));
81         h1T = copy(transpose(hg(sk,sk2)));
82         g0T = copy(transpose(hg(sk2,sk)));
83         g1T = copy(transpose(hg(sk2,sk2)));
84 
85     }
86 
87     template <typename T, std::size_t NDIM>
_init_quadrature(int k,int npt,Tensor<double> & quad_x,Tensor<double> & quad_w,Tensor<double> & quad_phi,Tensor<double> & quad_phiw,Tensor<double> & quad_phit)88     void FunctionCommonData<T,NDIM>::_init_quadrature
89     (int k, int npt, Tensor<double>& quad_x, Tensor<double>& quad_w,
90      Tensor<double>& quad_phi, Tensor<double>& quad_phiw, Tensor<double>& quad_phit) {
91         quad_x = Tensor<double>(npt); // point
92         quad_w = Tensor<double>(npt); // wheight
93         quad_phi = Tensor<double>(npt,k);
94         quad_phiw = Tensor<double>(npt,k);
95 
96         gauss_legendre(npt,0.0,1.0,quad_x.ptr(),quad_w.ptr());
97         for (int mu=0; mu<npt; ++mu) {
98             double phi[200];
99             legendre_scaling_functions(quad_x(mu),k,phi);
100             for (int j=0; j<k; ++j) {
101                 quad_phi(mu,j) = phi[j];
102                 quad_phiw(mu,j) = quad_w(mu)*phi[j];
103             }
104         }
105         quad_phit = transpose(quad_phi);
106     }
107 
108 
109     template <typename T, std::size_t NDIM>
verify_tree()110     void FunctionImpl<T,NDIM>::verify_tree() const {
111         PROFILE_MEMBER_FUNC(FunctionImpl);
112         world.gop.fence();  // Make sure nothing is going on
113 
114         // Verify consistency of compression status, existence and size of coefficients,
115         // and has_children() flag.
116         for (typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
117             const keyT& key = it->first;
118             const nodeT& node = it->second;
119             bool bad;
120 
121             if (is_compressed()) {
122                 if (node.has_children()) {
123                     bad = (node.coeff().has_data()) and (node.coeff().dim(0) != 2*cdata.k);
124                 }
125                 else {
126                     //                    bad = node.coeff().size() != 0;
127                     bad = node.coeff().has_data();
128                 }
129             }
130             else {
131                 if (node.has_children()) {
132                     //                    bad = node.coeff().size() != 0;
133                     bad = node.coeff().has_data();
134                 }
135                 else {
136                     bad = (node.coeff().has_data()) and ( node.coeff().dim(0) != cdata.k);
137                 }
138             }
139 
140             if (bad) {
141                 print(world.rank(), "FunctionImpl: verify: INCONSISTENT TREE NODE, key =", key, ", node =", node,
142                       ", dim[0] =",node.coeff().dim(0),", compressed =",is_compressed());
143                 std::cout.flush();
144                 MADNESS_EXCEPTION("FunctionImpl: verify: INCONSISTENT TREE NODE", 0);
145             }
146         }
147 
148         // Ensure that parents and children exist appropriately
149         for (typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
150             const keyT& key = it->first;
151             const nodeT& node = it->second;
152 
153             if (key.level() > 0) {
154                 const keyT parent = key.parent();
155                 typename dcT::const_iterator pit = coeffs.find(parent).get();
156                 if (pit == coeffs.end()) {
157                     print(world.rank(), "FunctionImpl: verify: MISSING PARENT",key,parent);
158                     std::cout.flush();
159                     MADNESS_EXCEPTION("FunctionImpl: verify: MISSING PARENT", 0);
160                 }
161                 const nodeT& pnode = pit->second;
162                 if (!pnode.has_children()) {
163                     print(world.rank(), "FunctionImpl: verify: PARENT THINKS IT HAS NO CHILDREN",key,parent);
164                     std::cout.flush();
165                     MADNESS_EXCEPTION("FunctionImpl: verify: PARENT THINKS IT HAS NO CHILDREN", 0);
166                 }
167             }
168 
169             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
170                 typename dcT::const_iterator cit = coeffs.find(kit.key()).get();
171                 if (cit == coeffs.end()) {
172                     if (node.has_children()) {
173                         print(world.rank(), "FunctionImpl: verify: MISSING CHILD",key,kit.key());
174                         std::cout.flush();
175                         MADNESS_EXCEPTION("FunctionImpl: verify: MISSING CHILD", 0);
176                     }
177                 }
178                 else {
179                     if (! node.has_children()) {
180                         print(world.rank(), "FunctionImpl: verify: UNEXPECTED CHILD",key,kit.key());
181                         std::cout.flush();
182                         MADNESS_EXCEPTION("FunctionImpl: verify: UNEXPECTED CHILD", 0);
183                     }
184                 }
185             }
186         }
187 
188         world.gop.fence();
189     }
190 
191 
192     template <typename T, std::size_t NDIM>
get_pmap()193     const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& FunctionImpl<T,NDIM>::get_pmap() const {
194         return coeffs.get_pmap();
195     }
196 
197 
198     /// perform: this= alpha*f + beta*g, invoked by result
199 
200     /// f and g are reconstructed, so we can save on the compress operation,
201     /// walk down the joint tree, and add leaf coefficients; effectively refines
202     /// to common finest level.
203     /// @param[in]  alpha   prefactor for f
204     /// @param[in]  f       first addend
205     /// @param[in]  beta    prefactor for g
206     /// @param[in]  g       second addend
207     /// @return     nothing, but leaves this's tree reconstructed and as sum of f and g
208     template <typename T, std::size_t NDIM>
gaxpy_oop_reconstructed(const double alpha,const implT & f,const double beta,const implT & g,const bool fence)209     void FunctionImpl<T,NDIM>::gaxpy_oop_reconstructed(const double alpha, const implT& f,
210                                                        const double beta, const implT& g, const bool fence) {
211 
212         MADNESS_ASSERT(not f.is_compressed());
213         MADNESS_ASSERT(not g.is_compressed());
214 
215         ProcessID owner = coeffs.owner(cdata.key0);
216         if (world.rank() == owner) {
217 
218             CoeffTracker<T,NDIM> ff(&f);
219             CoeffTracker<T,NDIM> gg(&g);
220 
221             typedef add_op coeff_opT;
222             coeff_opT coeff_op(ff,gg,alpha,beta);
223             typedef insert_op<T,NDIM> apply_opT;
224             apply_opT apply_op(this);
225 
226             woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>,
227                       coeff_op, apply_op, cdata.key0);
228 
229         }
230 
231         this->compressed=false;
232         if (fence) world.gop.fence();
233     }
234 
235     /// Returns true if the function is compressed.
236     template <typename T, std::size_t NDIM>
is_compressed()237     bool FunctionImpl<T,NDIM>::is_compressed() const {
238         return compressed;
239     }
240 
241     /// Returns true if the function is redundant.
242     template <typename T, std::size_t NDIM>
is_redundant()243     bool FunctionImpl<T,NDIM>::is_redundant() const {
244         return redundant;
245     }
246 
247     template <typename T, std::size_t NDIM>
is_nonstandard()248     bool FunctionImpl<T,NDIM>::is_nonstandard() const {return nonstandard;}
249 
250     template <typename T, std::size_t NDIM>
set_functor(const std::shared_ptr<FunctionFunctorInterface<T,NDIM>> functor1)251     void FunctionImpl<T,NDIM>::set_functor(const std::shared_ptr<FunctionFunctorInterface<T,NDIM> > functor1) {
252         this->on_demand=true;
253         functor=functor1;
254     }
255 
256     template <typename T, std::size_t NDIM>
get_functor()257     std::shared_ptr<FunctionFunctorInterface<T,NDIM> > FunctionImpl<T,NDIM>::get_functor() {
258         MADNESS_ASSERT(this->functor);
259         return functor;
260     }
261 
262     template <typename T, std::size_t NDIM>
get_functor()263     std::shared_ptr<FunctionFunctorInterface<T,NDIM> > FunctionImpl<T,NDIM>::get_functor() const {
264         MADNESS_ASSERT(this->functor);
265         return functor;
266     }
267 
268     template <typename T, std::size_t NDIM>
unset_functor()269     void FunctionImpl<T,NDIM>::unset_functor() {
270         this->on_demand=false;
271         functor.reset();
272     }
273 
274     template <typename T, std::size_t NDIM>
is_on_demand()275     bool& FunctionImpl<T,NDIM>::is_on_demand() {return on_demand;};
276 
277     template <typename T, std::size_t NDIM>
is_on_demand()278     const bool& FunctionImpl<T,NDIM>::is_on_demand() const {return on_demand;};
279 
280     template <typename T, std::size_t NDIM>
get_tensor_type()281     TensorType FunctionImpl<T,NDIM>::get_tensor_type() const {return targs.tt;}
282 
283     template <typename T, std::size_t NDIM>
get_tensor_args()284     TensorArgs FunctionImpl<T,NDIM>::get_tensor_args() const {return targs;}
285 
286     template <typename T, std::size_t NDIM>
get_thresh()287     double FunctionImpl<T,NDIM>::get_thresh() const {return thresh;}
288 
289     template <typename T, std::size_t NDIM>
set_thresh(double value)290     void FunctionImpl<T,NDIM>::set_thresh(double value) {thresh = value;}
291 
292     template <typename T, std::size_t NDIM>
get_autorefine()293     bool FunctionImpl<T,NDIM>::get_autorefine() const {return autorefine;}
294 
295     template <typename T, std::size_t NDIM>
set_autorefine(bool value)296     void FunctionImpl<T,NDIM>::set_autorefine(bool value) {autorefine = value;}
297 
298     template <typename T, std::size_t NDIM>
get_k()299     int FunctionImpl<T,NDIM>::get_k() const {return k;}
300 
301     template <typename T, std::size_t NDIM>
get_coeffs()302     const typename FunctionImpl<T,NDIM>::dcT& FunctionImpl<T,NDIM>::get_coeffs() const {return coeffs;}
303 
304     template <typename T, std::size_t NDIM>
get_coeffs()305     typename FunctionImpl<T,NDIM>::dcT& FunctionImpl<T,NDIM>::get_coeffs() {return coeffs;}
306 
307     template <typename T, std::size_t NDIM>
get_cdata()308     const FunctionCommonData<T,NDIM>& FunctionImpl<T,NDIM>::get_cdata() const {return cdata;}
309 
310     template <typename T, std::size_t NDIM>
accumulate_timer(const double time)311     void FunctionImpl<T,NDIM>::accumulate_timer(const double time) const {
312         timer_accumulate.accumulate(time);
313     }
314 
315     template <typename T, std::size_t NDIM>
print_timer()316     void FunctionImpl<T,NDIM>::print_timer() const {
317         if (world.rank()==0) {
318             timer_accumulate.print("accumulate");
319             timer_target_driven.print("target_driven");
320             timer_lr_result.print("result2low_rank");
321         }
322     }
323 
324     template <typename T, std::size_t NDIM>
reset_timer()325     void FunctionImpl<T,NDIM>::reset_timer() {
326         if (world.rank()==0) {
327             timer_accumulate.reset();
328             timer_target_driven.reset();
329             timer_lr_result.reset();
330         }
331     }
332 
333     /// Truncate according to the threshold with optional global fence
334 
335     /// If thresh<=0 the default value of this->thresh is used
336     template <typename T, std::size_t NDIM>
truncate(double tol,bool fence)337     void FunctionImpl<T,NDIM>::truncate(double tol, bool fence) {
338         // Cannot put tol into object since it would make a race condition
339         if (tol <= 0.0)
340             tol = thresh;
341         if (world.rank() == coeffs.owner(cdata.key0)) {
342             if (is_compressed()) {
343                 truncate_spawn(cdata.key0,tol);
344             } else {
345                 truncate_reconstructed_spawn(cdata.key0,tol);
346             }
347         }
348         if (fence)
349             world.gop.fence();
350     }
351 
352     template <typename T, std::size_t NDIM>
key0()353     const typename FunctionImpl<T,NDIM>::keyT& FunctionImpl<T,NDIM>::key0() const {
354         return cdata.key0;
355     }
356 
357     /// Print a plane ("xy", "xz", or "yz") containing the point x to file
358 
359     /// works for all dimensions; we walk through the tree, and if a leaf node
360     /// inside the sub-cell touches the plane we print it in pstricks format
361     template <typename T, std::size_t NDIM>
print_plane(const std::string filename,const int xaxis,const int yaxis,const coordT & el2)362     void FunctionImpl<T,NDIM>::print_plane(const std::string filename, const int xaxis, const int yaxis, const coordT& el2) {
363 
364         // get the local information
365         Tensor<double> localinfo=print_plane_local(xaxis,yaxis,el2);
366 
367         // lump all the local information together, and gather on node0
368         std::vector<Tensor<double> > localinfo_vec(1,localinfo);
369         std::vector<Tensor<double> > printinfo=world.gop.concat0(localinfo_vec);
370         world.gop.fence();
371 
372         // do the actual print
373         if (world.rank()==0) do_print_plane(filename,printinfo,xaxis,yaxis,el2);
374     }
375 
376     /// collect the data for a plot of the MRA structure locally on each node
377 
378     /// @param[in]	xaxis	the x-axis in the plot (can be any axis of the MRA box)
379     /// @param[in]	yaxis	the y-axis in the plot (can be any axis of the MRA box)
380     /// @param[in]	el2
381     template <typename T, std::size_t NDIM>
print_plane_local(const int xaxis,const int yaxis,const coordT & el2)382     Tensor<double> FunctionImpl<T,NDIM>::print_plane_local(const int xaxis, const int yaxis, const coordT& el2) {
383         coordT x_sim;
384         user_to_sim<NDIM>(el2,x_sim);
385         x_sim[2]+=1.e-10;
386 
387         // dimensions are: (# boxes)(hue, x lo left, y lo left, x hi right, y hi right)
388         Tensor<double> plotinfo(coeffs.size(),5);
389         long counter=0;
390 
391         // loop over local boxes, if the fit, add the info to the output tensor
392         typename dcT::const_iterator end = coeffs.end();
393         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
394             const keyT& key = it->first;
395             const nodeT& node = it->second;
396 
397             // thisKeyContains ignores dim0 and dim1
398             if (key.thisKeyContains(x_sim,xaxis,yaxis) and node.is_leaf() and (node.has_coeff())) {
399 
400                 Level n=key.level();
401                 Vector<Translation,NDIM> l=key.translation();
402                 // get the diametral edges of the node in the plotting plane
403                 double scale=std::pow(0.5,double(n));
404                 double xloleft = scale*l[xaxis];
405                 double yloleft = scale*l[yaxis];
406                 double xhiright = scale*(l[xaxis]+1);
407                 double yhiright = scale*(l[yaxis]+1);
408 
409                 // convert back to user coordinates
410                 Vector<double,4> user;
411                 user[0]=xloleft*FunctionDefaults<NDIM>::get_cell_width()[xaxis] + FunctionDefaults<NDIM>::get_cell()(xaxis,0);
412                 user[2]=xhiright*FunctionDefaults<NDIM>::get_cell_width()[xaxis] + FunctionDefaults<NDIM>::get_cell()(xaxis,0);
413                 user[1]=yloleft*FunctionDefaults<NDIM>::get_cell_width()[yaxis] + FunctionDefaults<NDIM>::get_cell()(yaxis,0);
414                 user[3]=yhiright*FunctionDefaults<NDIM>::get_cell_width()[yaxis] + FunctionDefaults<NDIM>::get_cell()(yaxis,0);
415 
416 
417                 //                    if ((xloleft<-5.0) or (yloleft<-5.0) or (xhiright>5.0) or (yhiright>5.0)) continue;
418                 if ((user[0]<-5.0) or (user[1]<-5.0) or (user[2]>5.0) or (user[3]>5.0)) continue;
419 
420                 // do rank or do error
421                 double color=0.0;
422                 if (1) {
423 
424                     const double maxrank=40;
425                     do_convert_to_color hue(maxrank,false);
426                     color=hue(node.coeff().rank());
427                 } else {
428 
429                     // Make quadrature rule of higher order
430                     const int npt = cdata.npt + 1;
431                     Tensor<double> qx, qw, quad_phi, quad_phiw, quad_phit;
432                     FunctionCommonData<T,NDIM>::_init_quadrature(k+1, npt, qx, qw, quad_phi, quad_phiw, quad_phit);
433                     do_err_box< FunctionFunctorInterface<T,NDIM> > op(this, this->get_functor().get(), npt, qx, quad_phit, quad_phiw);
434 
435                     do_convert_to_color hue(1000.0,true);
436                     double error=op(it);
437                     error=sqrt(error);//*pow(2,key.level()*6);
438                     color=hue(error);
439                 }
440 
441                 plotinfo(counter,0)=color;
442                 plotinfo(counter,1)=user[0];
443                 plotinfo(counter,2)=user[1];
444                 plotinfo(counter,3)=user[2];
445                 plotinfo(counter,4)=user[3];
446                 ++counter;
447             }
448         }
449 
450         // shrink the info
451         if (counter==0) plotinfo=Tensor<double>();
452         else plotinfo=plotinfo(Slice(0,counter-1),Slice(_));
453         return plotinfo;
454     }
455 
456     /// print the MRA structure
457     template <typename T, std::size_t NDIM>
do_print_plane(const std::string filename,std::vector<Tensor<double>> plotinfo,const int xaxis,const int yaxis,const coordT el2)458     void FunctionImpl<T,NDIM>::do_print_plane(const std::string filename, std::vector<Tensor<double> > plotinfo,
459                                               const int xaxis, const int yaxis, const coordT el2) {
460 
461         // invoke only on master node
462         MADNESS_ASSERT(world.rank()==0);
463 
464         // prepare file
465         FILE * pFile;
466         pFile = fopen(filename.c_str(), "w");
467         Tensor<double> cell=FunctionDefaults<NDIM>::get_cell();
468 
469 
470         fprintf(pFile,"\\psset{unit=1cm}\n");
471         fprintf(pFile,"\\begin{pspicture}(%4.2f,%4.2f)(%4.2f,%4.2f)\n",
472                 //            		cell(xaxis,0),cell(xaxis,1),cell(yaxis,0),cell(yaxis,1));
473                 -5.0,-5.0,5.0,5.0);
474         fprintf(pFile,"\\pslinewidth=0.1pt\n");
475 
476         for (std::vector<Tensor<double> >::const_iterator it=plotinfo.begin(); it!=plotinfo.end(); ++it) {
477 
478             Tensor<double> localinfo=*it;
479             if (localinfo.has_data()) {
480 
481                 for (long i=0; i<localinfo.dim(0); ++i) {
482 
483                     fprintf(pFile,"\\newhsbcolor{mycolor}{%8.4f 1.0 0.7}\n",localinfo(i,0));
484                     fprintf(pFile,"\\psframe["//linewidth=0.5pt,"
485                             "fillstyle=solid,"
486                             "fillcolor=mycolor]"
487                             "(%12.8f,%12.8f)(%12.8f,%12.8f)\n",
488                             localinfo(i,1),localinfo(i,2),localinfo(i,3),localinfo(i,4));
489                 }
490             }
491         }
492 
493 
494         fprintf(pFile,"\\end{pspicture}\n");
495         fclose(pFile);
496     }
497 
498     /// print the grid (the roots of the quadrature of each leaf box)
499     /// of this function in user xyz coordinates
500     template <typename T, std::size_t NDIM>
print_grid(const std::string filename)501     void  FunctionImpl<T,NDIM>::print_grid(const std::string filename) const {
502 
503         // get the local information
504         std::vector<keyT> local_keys=local_leaf_keys();
505 
506         // lump all the local information together, and gather on node0
507         std::vector<keyT> all_keys=world.gop.concat0(local_keys);
508         world.gop.fence();
509 
510         // do the actual print
511         if (world.rank()==0) do_print_grid(filename,all_keys);
512 
513     }
514 
515     /// return the keys of the local leaf boxes
516     template <typename T, std::size_t NDIM>
local_leaf_keys()517     std::vector<typename FunctionImpl<T,NDIM>::keyT>  FunctionImpl<T,NDIM>::local_leaf_keys() const {
518 
519         // coeffs.size is maximum number of keys (includes internal keys)
520         std::vector<keyT> keys(coeffs.size());
521 
522         // loop over local boxes, if they are leaf boxes add their quadrature roots
523         // to the output tensor
524         int i=0;
525         typename dcT::const_iterator end = coeffs.end();
526         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
527             const keyT& key = it->first;
528             const nodeT& node = it->second;
529             if (node.is_leaf()) keys[i++]=key;
530         }
531 
532         // shrink the vector to number of leaf keys
533         keys.resize(i);
534         return keys;
535     }
536 
537     /// print the grid in xyz format
538 
539     /// the quadrature points and the key information will be written to file,
540     /// @param[in]	filename	where the quadrature points will be written to
541     /// @param[in]	keys		all leaf keys
542     template <typename T, std::size_t NDIM>
do_print_grid(const std::string filename,const std::vector<keyT> & keys)543     void  FunctionImpl<T,NDIM>::do_print_grid(const std::string filename, const std::vector<keyT>& keys) const {
544         // invoke only on master node
545         MADNESS_ASSERT(world.rank()==0);
546 
547         // the quadrature points in simulation coordinates of the root node
548         const Tensor<double> qx=cdata.quad_x;
549         const size_t npt = qx.dim(0);
550 
551         // the number of coordinates (grid point tuples) per box ({x1},{x2},{x3},..,{xNDIM})
552         long npoints=power<NDIM>(npt);
553         // the number of boxes
554         long nboxes=keys.size();
555 
556         // prepare file
557         FILE * pFile;
558         pFile = fopen(filename.c_str(), "w");
559 
560         fprintf(pFile,"%ld\n",npoints*nboxes);
561         fprintf(pFile,"%ld points per box and %ld boxes \n",npoints,nboxes);
562 
563         // loop over all leaf boxes
564         typename std::vector<keyT>::const_iterator key_it=keys.begin();
565         for (key_it=keys.begin(); key_it!=keys.end(); ++key_it) {
566 
567             const keyT& key=*key_it;
568             fprintf(pFile,"# key: %8d",key.level());
569             for (size_t d=0; d<NDIM; d++) fprintf(pFile,"%8d",int(key.translation()[d]));
570             fprintf(pFile,"\n");
571 
572             // this is borrowed from fcube
573             const Vector<Translation,NDIM>& l = key.translation();
574             const Level n = key.level();
575             const double h = std::pow(0.5,double(n));
576             coordT c; // will hold the point in user coordinates
577 
578             const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width();
579             const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell();
580 
581             if (NDIM == 3) {
582                 for (size_t i=0; i<npt; ++i) {
583                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
584                     for (size_t j=0; j<npt; ++j) {
585                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
586                         for (size_t k=0; k<npt; ++k) {
587                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
588                             // grid weights
589                             //					            double scale = pow(0.5,0.5*NDIM*key.level())*
590                             //					            		sqrt(FunctionDefaults<NDIM>::get_cell_volume());
591                             //					            double w=cdata.quad_phiw[i]*cdata.quad_phiw[j]*cdata.quad_phiw[k];
592 
593                             fprintf(pFile,"%18.12f %18.12f %18.12f\n",c[0],c[1],c[2]);
594                             //								fprintf(pFile,"%18.12e %18.12e %18.12e %18.12e\n",c[0],c[1],c[2],w*scale);
595                         }
596                     }
597                 }
598             } else {
599                 MADNESS_EXCEPTION("only NDIM=3 in print_grid",0);
600             }
601         }
602         fclose(pFile);
603     }
604 
605 
606     /// Returns the truncation threshold according to truncate_method
607     template <typename T, std::size_t NDIM>
truncate_tol(double tol,const keyT & key)608     double FunctionImpl<T,NDIM>::truncate_tol(double tol, const keyT& key) const {
609 
610         // RJH ... introduced max level here to avoid runaway
611         // refinement due to truncation threshold going down to
612         // intrinsic numerical error
613         const int MAXLEVEL1 = 20; // 0.5**20 ~= 1e-6
614         const int MAXLEVEL2 = 10; // 0.25**10 ~= 1e-6
615 
616         if (truncate_mode == 0) {
617             return tol;
618         }
619         else if (truncate_mode == 1) {
620             double L = FunctionDefaults<NDIM>::get_cell_min_width();
621             return tol*std::min(1.0,pow(0.5,double(std::min(key.level(),MAXLEVEL1)))*L);
622         }
623         else if (truncate_mode == 2) {
624             double L = FunctionDefaults<NDIM>::get_cell_min_width();
625             return tol*std::min(1.0,pow(0.25,double(std::min(key.level(),MAXLEVEL2)))*L*L);
626         }
627         else if (truncate_mode == 3) {
628             // similar to truncate mode 1, but with an additional factor to
629             // account for an increased number of boxes in higher dimensions
630 
631             // here is our handwaving argument: this threshold will give each
632             // FunctionNode an error of less than tol. The total error can
633             // then be as high as sqrt(#nodes) * tol. Therefore in order to
634             // account for higher dimensions: divide tol by about the root of
635             // number of siblings (2^NDIM) that have a large error when we
636             // refine along a deep branch of the tree. FAB
637             //
638             // Nope ... it can easily be as high as #nodes * tol.  The real
639             // fix for this is an end-to-end error analysis of the larger
640             // application and if desired to include this factor into the
641             // threshold selected by the application. RJH
642             const static double fac=1.0/std::pow(2,NDIM*0.5);
643             tol*=fac;
644 
645             double L = FunctionDefaults<NDIM>::get_cell_min_width();
646             return tol*std::min(1.0,pow(0.5,double(std::min(key.level(),MAXLEVEL1)))*L);
647 
648         } else {
649             MADNESS_EXCEPTION("truncate_mode invalid",truncate_mode);
650         }
651     }
652 
653     /// Returns patch referring to coeffs of child in parent box
654     template <typename T, std::size_t NDIM>
child_patch(const keyT & child)655     std::vector<Slice> FunctionImpl<T,NDIM>::child_patch(const keyT& child) const {
656         std::vector<Slice> s(NDIM);
657         const Vector<Translation,NDIM>& l = child.translation();
658         for (std::size_t i=0; i<NDIM; ++i)
659             s[i] = cdata.s[l[i]&1]; // Lowest bit of translation
660         return s;
661     }
662 
663     /// Directly project parent NS coeffs to child NS coeffs
664 
665     template <typename T, std::size_t NDIM>
parent_to_child_NS(const keyT & child,const keyT & parent,const coeffT & coeff)666     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::parent_to_child_NS(
667             const keyT& child, const keyT& parent, const coeffT& coeff) const {
668 
669         const implT* f=this;
670         //        	MADNESS_ASSERT(coeff.tensor_type()==TT_FULL);
671         coeffT result;
672 
673         // if the node for child is existent in f, and it is an internal node, we
674         // automatically have the NS form; if it is a leaf node, we only have the
675         // sum coeffs, so we take zero difference coeffs
676         if (child==parent) {
677             if (coeff.dim(0)==2*f->get_k()) result=coeff;		// internal node
678             else if (coeff.dim(0)==f->get_k()) {			// leaf node
679                 tensorT t(f->cdata.v2k);
680                 t(f->cdata.s0)=coeff.full_tensor_copy();
681                 result=coeffT(t,f->get_tensor_args());
682             } else {
683                 MADNESS_EXCEPTION("confused k in parent_to_child_NS",1);
684             }
685         } else if (child.level()>parent.level()) {
686 
687             // parent and coeff should refer to a leaf node with sum coeffs only
688             // b/c tree should be compressed with leaves kept.
689             MADNESS_ASSERT(coeff.dim(0)==f->get_k());
690             const coeffT coeff1=f->parent_to_child(coeff,parent,child);
691             tensorT t(f->cdata.v2k);
692             t(f->cdata.s0)=coeff1.full_tensor_copy();
693             result=coeffT(t,f->get_tensor_args());
694         } else {
695             MADNESS_EXCEPTION("confused keys in parent_to_child_NS",1);
696         }
697         return result;
698     }
699 
700     /// Get the scaling function coeffs at level n starting from NS form
701     // N=2^n, M=N/q, q must be power of 2
702     // q=0 return coeffs [N,k] for direct sum
703     // q>0 return coeffs [k,q,M] for fft sum
704     template <typename T, std::size_t NDIM>
coeffs_for_jun(Level n,long q)705     typename FunctionImpl<T,NDIM>::tensorT FunctionImpl<T,NDIM>::coeffs_for_jun(Level n, long q) {
706         MADNESS_ASSERT(compressed && nonstandard && NDIM<=3);
707         tensorT r,r0;
708         long N=1<<n;
709         long M = (q ? N/q: N);
710         if (q==0) {
711             q = 1;
712             long dim[2*NDIM];
713             for (std::size_t d=0; d<NDIM; ++d) {
714                 dim[d ] = N;
715                 dim[d+NDIM] = cdata.k;
716             }
717             tensorT rr(2*NDIM,dim);
718             r0=r=rr;
719             //NNkk->MqMqkk, since fuse is not allowed. Now needs to move back to 2*NDIM, since tensor max dim is 6
720             //for (int d=NDIM-1; d>=0; --d) r.splitdim_inplace_base(d,M,q);
721         } else {
722             long dim[2*NDIM];
723             for (std::size_t d=0; d<NDIM; ++d) {
724                 //dim[d+NDIM*2] = M;
725                 dim[d+NDIM ] = N;
726                 dim[d ] = cdata.k;
727             }
728             tensorT rr(2*NDIM,dim);
729             r0=rr;
730             /*vector<long> map(3*NDIM);
731               for (int d=0; d<NDIM; ++d) {
732               map[d]=d+2*NDIM;
733               map[NDIM+d]=2*d+1;
734               map[2*NDIM+d]=2*d;
735               }
736               r.mapdim_inplace_base(map);
737               //print(rr);
738               //for (int d=1; d<NDIM; ++d) rr.swapdim_inplace_base(2*NDIM+d,NDIM+d); //kkqqMM->kkqMqM
739               //print(rr);
740               //for (int d=0; d<NDIM; ++d) rr.swapdim_inplace_base(NDIM+2*d,NDIM+2*d-1); //kkqMqM->kkMqMq
741               //print(rr);
742               //for (int d=0; d<NDIM; ++d) rr.fusedim_inplace_base(NDIM+d); //->kkNN
743               //seems that this fuse is not allowed :(
744 
745               //print(rr);
746               */
747             r=rr.cycledim(NDIM,0,-1); //->NNkk or MqMqkk
748         }
749         print("faking done M q r(fake) r0(real)",M,q,"\n", std::vector<long> (r.dims(),r.dims()+6),std::vector<long> (r0.dims(),r0.dims()+6));
750         ProcessID me = world.rank();
751         Vector<long,NDIM> t(N);
752 
753         Vector<long,NDIM> powq, powN, powM;
754         long NDIM1 = NDIM-1;
755         powM[NDIM1]=powq[NDIM1]=powN[NDIM1]=1;
756         for (int d=NDIM1-1; d>=0; --d) {
757             powM[d] = powM[d+1]*M;
758             powq[d] = powq[d+1]*q;
759             powN[d] = powN[d+1]*N;
760         }
761         long powMNDIM = powM[0]*M;
762 
763         for (IndexIterator it(t); it; ++it) {
764             keyT key(n, Vector<Translation,NDIM>(*it));
765             if (coeffs.owner(key) == me) {
766                 typename dcT::iterator it = coeffs.find(key).get();
767                 coeffT qq;
768 
769                 if (it == coeffs.end()) {
770                     // must get from above
771                     typedef std::pair< keyT,coeffT > pairT;
772                     Future<pairT> result;
773                     sock_it_to_me(key, result.remote_ref(world));
774                     const keyT& parent = result.get().first;
775                     //                        const tensorT& t = result.get().second.full_tensor_copy();
776                     const coeffT& t = result.get().second;
777 
778                     qq = (parent_to_child(t, parent, key));
779                 } else {
780                     qq = copy(it->second.coeff());
781                 }
782                 std::vector<Slice> s(NDIM*2);
783                 long ll = 0;
784                 for (std::size_t d=0; d<NDIM; ++d) {
785                     Translation l = key.translation()[d];
786                     long dum = long(float(l)/q);
787                     ll += (l - dum*q)*powMNDIM*powq[d] + dum*powM[d];
788                     //ll += (l % q)*powM[NDIM]*pow((double)q,NDIM-d-1) + (l/q)*pow((double)M,NDIM-d-1);
789 
790                     //print("translation",l);
791                     //s[d       ] = Slice(l,l,0);
792                     //s[d+NDIM  ] = Slice(l%q,l%q,0);
793                     //s[d+NDIM] = Slice(0,k-1,1);
794                 }
795                 //long dum = ll;
796                 for (std::size_t d=0; d<NDIM; ++d) {
797                     Translation l = Translation(float(ll) / powN[d]);
798                     //Translation l = ll / pow((double)N,NDIM-d-1);
799                     s[d ] = Slice(l,l,0);
800                     s[d+NDIM] = Slice(0,k-1,1);
801                     ll = ll - l*powN[d];
802                     //ll = ll % long(pow((double)N,NDIM-d-1));
803                 }
804                 //print(s, dum, key.translation());
805                 coeffT qqq=qq(cdata.s0);
806                 r(s) = qqq.full_tensor_copy();
807 
808             }
809         }
810 
811         world.gop.fence();
812         world.gop.sum(r0);
813         //print(r,r0);
814 
815         return r0;
816     }
817 
818     /// truncate tree at a certain level
819     template <typename T, std::size_t NDIM>
erase(const Level & max_level)820     void FunctionImpl<T,NDIM>::erase(const Level& max_level) {
821         this->make_redundant(true);
822 
823         typename dcT::iterator end = coeffs.end();
824         for (typename dcT::iterator it= coeffs.begin(); it!=end; ++it) {
825             keyT key=it->first;
826             nodeT& node=it->second;
827             if (key.level()>max_level) coeffs.erase(key);
828             if (key.level()==max_level) node.set_has_children(false);
829         }
830         this->undo_redundant(true);
831     }
832 
833 
834     /// Returns some asymmetry measure ... no comms
835     template <typename T, std::size_t NDIM>
check_symmetry_local()836     double FunctionImpl<T,NDIM>::check_symmetry_local() const {
837         PROFILE_MEMBER_FUNC(FunctionImpl);
838         typedef Range<typename dcT::const_iterator> rangeT;
839         return world.taskq.reduce<double,rangeT,do_check_symmetry_local>(rangeT(coeffs.begin(),coeffs.end()),
840                                                                          do_check_symmetry_local(*this));
841     }
842 
843 
844     /// Refine multiple functions down to the same finest level
845 
846     /// @param[v] is the vector of functions we are refining.
847     /// @param[key] is the current node.
848     /// @param[c] is the vector of coefficients passed from above.
849     template <typename T, std::size_t NDIM>
refine_to_common_level(const std::vector<FunctionImpl<T,NDIM> * > & v,const std::vector<tensorT> & c,const keyT key)850     void FunctionImpl<T,NDIM>::refine_to_common_level(const std::vector<FunctionImpl<T,NDIM>*>& v,
851                                                       const std::vector<tensorT>& c,
852                                                       const keyT key) {
853         if (key == cdata.key0 && coeffs.owner(key)!=world.rank()) return;
854 
855         // First insert coefficients from above ... also get write accessors here
856         std::unique_ptr<typename dcT::accessor[]> acc(new typename dcT::accessor[v.size()]);
857         for (unsigned int i=0; i<c.size(); i++) {
858             MADNESS_ASSERT(v[i]->coeffs.get_pmap() == coeffs.get_pmap());
859             MADNESS_ASSERT(v[i]->coeffs.owner(key) == world.rank());
860             bool exists = ! v[i]->coeffs.insert(acc[i],key);
861             if (c[i].size()) {
862                 MADNESS_ASSERT(!exists);
863                 acc[i]->second = nodeT(coeffT(c[i],targs),false);
864             }
865             else {
866                 MADNESS_ASSERT(exists);
867             }
868         }
869 
870         // If everyone has coefficients we are done
871         bool done = true;
872         for (unsigned int i=0; i<v.size(); i++) {
873             done &= acc[i]->second.has_coeff();
874         }
875 
876         if (!done) {
877             // Those functions with coefficients need to be refined down
878             std::vector<tensorT> d(v.size());
879             for (unsigned int i=0; i<v.size(); i++) {
880                 if (acc[i]->second.has_coeff()) {
881                     tensorT s(cdata.v2k);
882                     //                        s(cdata.s0) = acc[i]->second.coeff()(___);
883                     s(cdata.s0) = acc[i]->second.coeff().full_tensor_copy();
884                     acc[i]->second.clear_coeff();
885                     d[i] = unfilter(s);
886                     acc[i]->second.set_has_children(true);
887                 }
888             }
889 
890             // Loop thru children and pass down
891             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
892                 const keyT& child = kit.key();
893                 std::vector<Slice> cp = child_patch(child);
894                 std::vector<tensorT> childc(v.size());
895                 for (unsigned int i=0; i<v.size(); i++) {
896                     if (d[i].size()) childc[i] = copy(d[i](cp));
897                 }
898                 woT::task(coeffs.owner(child), &implT::refine_to_common_level, v, childc, child);
899             }
900         }
901     }
902 
903     // horrifically non-scalable
904     template <typename T, std::size_t NDIM>
put_in_box(ProcessID from,long nl,long ni)905     void FunctionImpl<T,NDIM>::put_in_box(ProcessID from, long nl, long ni) const {
906         if (world.size()> 1000)
907             throw "NO!";
908         box_leaf[from] = nl;
909         box_interior[from] = ni;
910     }
911 
912     /// Prints summary of data distribution
913     template <typename T, std::size_t NDIM>
print_info()914     void FunctionImpl<T,NDIM>::print_info() const {
915         if (world.size() >= 1000)
916             return;
917         for (int i=0; i<world.size(); ++i)
918             box_leaf[i] = box_interior[i] == 0;
919         world.gop.fence();
920         long nleaf=0, ninterior=0;
921         typename dcT::const_iterator end = coeffs.end();
922         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
923             const nodeT& node = it->second;
924             if (node.is_leaf())
925                 ++nleaf;
926             else
927                 ++ninterior;
928         }
929         this->send(0, &implT::put_in_box, world.rank(), nleaf, ninterior);
930         world.gop.fence();
931         if (world.rank() == 0) {
932             for (int i=0; i<world.size(); ++i) {
933                 printf("load: %5d %8ld %8ld\n", i, box_leaf[i], box_interior[i]);
934             }
935         }
936         world.gop.fence();
937     }
938 
939     template <typename T, std::size_t NDIM>
noautorefine(const keyT & key,const tensorT & t)940     bool FunctionImpl<T,NDIM>::noautorefine(const keyT& key, const tensorT& t) const {
941         return false;
942     }
943 
944     /// Returns true if this block of coeffs needs autorefining
945     template <typename T, std::size_t NDIM>
autorefine_square_test(const keyT & key,const nodeT & t)946     bool FunctionImpl<T,NDIM>::autorefine_square_test(const keyT& key, const nodeT& t) const {
947         double lo, hi;
948         tnorm(t.coeff().full_tensor_copy(), &lo, &hi);
949         double test = 2*lo*hi + hi*hi;
950         //print("autoreftest",key,thresh,truncate_tol(thresh, key),lo,hi,test);
951         return test> truncate_tol(thresh, key);
952     }
953 
954 
955     /// is this the same as trickle_down() ?
956     template <typename T, std::size_t NDIM>
sum_down_spawn(const keyT & key,const coeffT & s)957     void FunctionImpl<T,NDIM>::sum_down_spawn(const keyT& key, const coeffT& s) {
958         typename dcT::accessor acc;
959         coeffs.insert(acc,key);
960         nodeT& node = acc->second;
961         coeffT& c = node.coeff();
962 
963         //print(key,"received",s.normf(),c.normf(),node.has_children());
964 
965         if (s.size() > 0) {
966             if (c.size() > 0)
967                 c.gaxpy(1.0,s,1.0);
968             else
969                 c = s;
970         }
971 
972         if (node.has_children()) {
973             coeffT d;
974             if (c.has_data()) {
975                 d = coeffT(cdata.v2k,targs);
976                 d(cdata.s0) += c;
977                 d = unfilter(d);
978                 node.clear_coeff();
979             }
980             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
981                 coeffT ss;
982                 const keyT& child = kit.key();
983                 if (d.size() > 0) ss = copy(d(child_patch(child)));
984                 //print(key,"sending",ss.normf(),"to",child);
985                 woT::task(coeffs.owner(child), &implT::sum_down_spawn, child, ss);
986             }
987         }
988         else {
989             // Missing coeffs assumed to be zero
990             if (c.size() <= 0) c = coeffT(cdata.vk,targs);
991         }
992     }
993 
994     /// After 1d push operator must sum coeffs down the tree to restore correct scaling function coefficients
995     template <typename T, std::size_t NDIM>
sum_down(bool fence)996     void FunctionImpl<T,NDIM>::sum_down(bool fence) {
997         if (world.rank() == coeffs.owner(cdata.key0)) sum_down_spawn(cdata.key0, coeffT());
998 
999         if (fence) world.gop.fence();
1000     }
1001 
1002 
1003     template <typename T, std::size_t NDIM>
forward_do_diff1(const DerivativeBase<T,NDIM> * D,const implT * f,const keyT & key,const std::pair<keyT,coeffT> & left,const std::pair<keyT,coeffT> & center,const std::pair<keyT,coeffT> & right)1004     void FunctionImpl<T,NDIM>::forward_do_diff1(const DerivativeBase<T,NDIM>* D,
1005                                                 const implT* f,
1006                                                 const keyT& key,
1007                                                 const std::pair<keyT,coeffT>& left,
1008                                                 const std::pair<keyT,coeffT>& center,
1009                                                 const std::pair<keyT,coeffT>& right) {
1010         D->forward_do_diff1(f,this,key,left,center,right);
1011     }
1012 
1013 
1014     template <typename T, std::size_t NDIM>
do_diff1(const DerivativeBase<T,NDIM> * D,const implT * f,const keyT & key,const std::pair<keyT,coeffT> & left,const std::pair<keyT,coeffT> & center,const std::pair<keyT,coeffT> & right)1015     void FunctionImpl<T,NDIM>::do_diff1(const DerivativeBase<T,NDIM>* D,
1016                                         const implT* f,
1017                                         const keyT& key,
1018                                         const std::pair<keyT,coeffT>& left,
1019                                         const std::pair<keyT,coeffT>& center,
1020                                         const std::pair<keyT,coeffT>& right) {
1021         D->do_diff1(f,this,key,left,center,right);
1022     }
1023 
1024 
1025     // Called by result function to differentiate f
1026     template <typename T, std::size_t NDIM>
diff(const DerivativeBase<T,NDIM> * D,const implT * f,bool fence)1027     void FunctionImpl<T,NDIM>::diff(const DerivativeBase<T,NDIM>* D, const implT* f, bool fence) {
1028         typedef std::pair<keyT,coeffT> argT;
1029         typename dcT::const_iterator end = f->coeffs.end();
1030         for (typename dcT::const_iterator it=f->coeffs.begin(); it!=end; ++it) {
1031             const keyT& key = it->first;
1032             const nodeT& node = it->second;
1033             if (node.has_coeff()) {
1034                 Future<argT> left  = D->find_neighbor(f, key,-1);
1035                 argT center(key,node.coeff());
1036                 Future<argT> right = D->find_neighbor(f, key, 1);
1037                 world.taskq.add(*this, &implT::do_diff1, D, f, key, left, center, right, TaskAttributes::hipri());
1038             }
1039             else {
1040                 coeffs.replace(key,nodeT(coeffT(),true)); // Empty internal node
1041             }
1042         }
1043         if (fence) world.gop.fence();
1044     }
1045 
1046 
1047     /// return the a std::pair<key, node>, which MUST exist
1048     template <typename T, std::size_t NDIM>
find_datum(keyT key)1049     std::pair<Key<NDIM>,ShallowNode<T,NDIM> > FunctionImpl<T,NDIM>::find_datum(keyT key) const {
1050         MADNESS_ASSERT(coeffs.probe(key));
1051         ShallowNode<T,NDIM> snode(coeffs.find(key).get()->second);
1052         return std::pair<Key<NDIM>,ShallowNode<T,NDIM> >(key,snode);
1053     }
1054 
1055     /// multiply the ket with a one-electron potential rr(1,2)= f(1,2)*g(1)
1056 
1057     /// @param[in]	val_ket	function values of f(1,2)
1058     /// @param[in]	val_pot	function values of g(1)
1059     /// @param[in]	particle	if 0 then g(1), if 1 then g(2)
1060     /// @return		the resulting function values
1061     template <typename T, std::size_t NDIM>
multiply(const coeffT & val_ket,const coeffT & val_pot,int particle)1062     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::multiply(const coeffT& val_ket,
1063             const coeffT& val_pot, int particle) const {
1064 
1065         MADNESS_ASSERT(particle==0 or particle==1);
1066         MADNESS_ASSERT(val_pot.tensor_type()==TT_FULL);
1067         MADNESS_ASSERT(val_ket.tensor_type()==TT_2D);
1068 
1069         std::vector<long> vkhalf=std::vector<long>(NDIM/2,cdata.vk[0]);
1070         tensorT ones=tensorT(vkhalf);
1071         ones=1.0;
1072 
1073         TensorArgs targs(-1.0,val_ket.tensor_type());
1074         coeffT pot12;
1075         if (particle==0) pot12=outer(val_pot.full_tensor(),ones,targs);
1076         else if (particle==1) pot12=outer(ones,val_pot.full_tensor(),targs);
1077 
1078         coeffT result=copy(val_ket);
1079         result.emul(pot12);
1080 
1081         return result;
1082     }
1083 
1084 
1085     /// given several coefficient tensors, assemble a result tensor
1086 
1087     /// the result looks like: 	(v(1,2) + v(1) + v(2)) |ket(1,2)>
1088     /// or 						(v(1,2) + v(1) + v(2)) |p(1) p(2)>
1089     /// i.e. coefficients for the ket and coefficients for the two particles are
1090     /// mutually exclusive. All potential terms are optional, just pass in empty coeffs.
1091     /// @param[in]	key			the key of the FunctionNode to which these coeffs belong
1092     /// @param[in]	cket		coefficients of the ket
1093     /// @param[in]	vpotential1	function values of the potential for particle 1
1094     /// @param[in]	vpotential2	function values of the potential for particle 2
1095     /// @param[in]	veri		function values for the 2-particle potential
1096     template <typename T, std::size_t NDIM>
assemble_coefficients(const keyT & key,const coeffT & coeff_ket,const coeffT & vpotential1,const coeffT & vpotential2,const tensorT & veri)1097     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::assemble_coefficients(
1098             const keyT& key, const coeffT& coeff_ket, const coeffT& vpotential1,
1099             const coeffT& vpotential2, const tensorT& veri) const {
1100 
1101         // take a shortcut if we are already done
1102         bool ket_only=(not (vpotential1.has_data() or vpotential2.has_data() or veri.has_data()));
1103         if (ket_only) return coeff_ket;
1104 
1105         // switch to values instead of coefficients
1106         coeffT val_ket=coeffs2values(key,coeff_ket);
1107 
1108         // the result tensor
1109         coeffT val_result;
1110         coeffT coeff_result;
1111 
1112         // potential for particles 1 and 2, must be done in TT_2D
1113         if (vpotential1.has_data() or vpotential2.has_data()) {
1114             val_ket=val_ket.convert(TensorArgs(-1.0,TT_2D));
1115         }
1116         if (vpotential1.has_data()) val_result+=multiply(val_ket,vpotential1,0);
1117         if (vpotential2.has_data()) val_result+=multiply(val_ket,vpotential2,1);
1118 
1119         // values for eri: this must be done in full rank...
1120         if (veri.has_data()) {
1121             tensorT val_ket2=val_ket.full_tensor_copy().emul(veri);
1122             if (val_result.has_data()) val_ket2+=val_result.full_tensor_copy();
1123             // values2coeffs expensive (30%), coeffT() (relatively) cheap (8%)
1124             coeff_result=coeffT(values2coeffs(key,val_ket2),this->get_tensor_args());
1125 
1126         } else {
1127 
1128             // convert back to original tensor type
1129             val_ket=val_ket.convert(get_tensor_args());
1130             MADNESS_ASSERT(val_result.has_data());
1131             coeff_result=values2coeffs(key,val_result);
1132             coeff_result.reduce_rank(this->get_tensor_args().thresh);
1133         }
1134 
1135         return coeff_result;
1136 
1137     }
1138 
1139     /// Permute the dimensions of f according to map, result on this
1140     template <typename T, std::size_t NDIM>
mapdim(const implT & f,const std::vector<long> & map,bool fence)1141     void FunctionImpl<T,NDIM>::mapdim(const implT& f, const std::vector<long>& map, bool fence) {
1142 
1143         PROFILE_MEMBER_FUNC(FunctionImpl);
1144         const_cast<implT*>(&f)->flo_unary_op_node_inplace(do_mapdim(map,*this),fence);
1145 
1146     }
1147 
1148 
1149     /// take the average of two functions, similar to: this=0.5*(this+rhs)
1150 
1151     /// works in either basis and also in nonstandard form
1152     template <typename T, std::size_t NDIM>
average(const implT & rhs)1153     void FunctionImpl<T,NDIM>::average(const implT& rhs) {
1154 
1155         rhs.flo_unary_op_node_inplace(do_average(*this),true);
1156         this->scale_inplace(0.5,true);
1157         flo_unary_op_node_inplace(do_reduce_rank(targs),true);
1158     }
1159 
1160     /// change the tensor type of the coefficients in the FunctionNode
1161 
1162     /// @param[in]  targs   target tensor arguments (threshold and full/low rank)
1163     template <typename T, std::size_t NDIM>
change_tensor_type1(const TensorArgs & targs,bool fence)1164     void FunctionImpl<T,NDIM>::change_tensor_type1(const TensorArgs& targs, bool fence) {
1165         flo_unary_op_node_inplace(do_change_tensor_type(targs),fence);
1166     }
1167 
1168     /// reduce the rank of the coefficients tensors
1169 
1170     /// @param[in]  targs   target tensor arguments (threshold and full/low rank)
1171     template <typename T, std::size_t NDIM>
reduce_rank(const TensorArgs & targs,bool fence)1172     void FunctionImpl<T,NDIM>::reduce_rank(const TensorArgs& targs, bool fence) {
1173         flo_unary_op_node_inplace(do_reduce_rank(targs),fence);
1174     }
1175 
1176 
1177     /// Transform sum coefficients at level n to sums+differences at level n-1
1178 
1179     /// Given scaling function coefficients s[n][l][i] and s[n][l+1][i]
1180     /// return the scaling function and wavelet coefficients at the
1181     /// coarser level.  I.e., decompose Vn using Vn = Vn-1 + Wn-1.
1182     /// \code
1183     /// s_i = sum(j) h0_ij*s0_j + h1_ij*s1_j
1184     /// d_i = sum(j) g0_ij*s0_j + g1_ij*s1_j
1185     //  \endcode
1186     /// Returns a new tensor and has no side effects.  Works for any
1187     /// number of dimensions.
1188     ///
1189     /// No communication involved.
1190     template <typename T, std::size_t NDIM>
filter(const tensorT & s)1191     typename FunctionImpl<T,NDIM>::tensorT FunctionImpl<T,NDIM>::filter(const tensorT& s) const {
1192         tensorT r(cdata.v2k,false);
1193         tensorT w(cdata.v2k,false);
1194         return fast_transform(s,cdata.hgT,r,w);
1195         //return transform(s,cdata.hgT);
1196     }
1197 
1198     template <typename T, std::size_t NDIM>
filter(const coeffT & s)1199     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::filter(const coeffT& s) const {
1200         coeffT result=transform(s,cdata.hgT);
1201         return result;
1202     }
1203 
1204     ///  Transform sums+differences at level n to sum coefficients at level n+1
1205 
1206     ///  Given scaling function and wavelet coefficients (s and d)
1207     ///  returns the scaling function coefficients at the next finer
1208     ///  level.  I.e., reconstruct Vn using Vn = Vn-1 + Wn-1.
1209     ///  \code
1210     ///  s0 = sum(j) h0_ji*s_j + g0_ji*d_j
1211     ///  s1 = sum(j) h1_ji*s_j + g1_ji*d_j
1212     ///  \endcode
1213     ///  Returns a new tensor and has no side effects
1214     ///
1215     ///  If (sonly) ... then ss is only the scaling function coeff (and
1216     ///  assume the d are zero).  Works for any number of dimensions.
1217     ///
1218     /// No communication involved.
1219     template <typename T, std::size_t NDIM>
unfilter(const tensorT & s)1220     typename FunctionImpl<T,NDIM>::tensorT FunctionImpl<T,NDIM>::unfilter(const tensorT& s) const {
1221         tensorT r(cdata.v2k,false);
1222         tensorT w(cdata.v2k,false);
1223         return fast_transform(s,cdata.hg,r,w);
1224         //return transform(s, cdata.hg);
1225     }
1226 
1227     template <typename T, std::size_t NDIM>
unfilter(const coeffT & s)1228     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::unfilter(const coeffT& s) const {
1229         return transform(s,cdata.hg);
1230     }
1231 
1232     /// downsample the sum coefficients of level n+1 to sum coeffs on level n
1233 
1234     /// specialization of the filter method, will yield only the sum coefficients
1235     /// @param[in]  key key of level n
1236     /// @param[in]  v   vector of sum coefficients of level n+1
1237     /// @param[in]  args    TensorArguments for possible low rank approximations
1238     /// @return     sum coefficients on level n in full tensor format
1239     template <typename T, std::size_t NDIM>
downsample(const keyT & key,const std::vector<Future<coeffT>> & v)1240     typename FunctionImpl<T,NDIM>::tensorT FunctionImpl<T,NDIM>::downsample(const keyT& key, const std::vector< Future<coeffT > >& v) const {
1241 
1242         tensorT result(cdata.vk);
1243 
1244         // the twoscale coefficients: for downsampling use h0/h1; see Alpert Eq (3.34a)
1245         const tensorT h[2] = {cdata.h0T, cdata.h1T};
1246         tensorT matrices[NDIM];
1247 
1248         // loop over all child nodes, transform and accumulate
1249         long i=0;
1250         for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
1251 
1252             // get the appropriate twoscale coefficients for each dimension
1253             for (size_t ii=0; ii<NDIM; ++ii) matrices[ii]=h[kit.key().translation()[ii]%2];
1254 
1255             // transform and accumulate on the result
1256             result+=general_transform(v[i].get(),matrices).full_tensor_copy();
1257 
1258         }
1259         return result;
1260     }
1261 
1262     /// upsample the sum coefficients of level 1 to sum coeffs on level n+1
1263 
1264     /// specialization of the unfilter method, will transform only the sum coefficients
1265     /// @param[in]  key     key of level n+1
1266     /// @param[in]  coeff   sum coefficients of level n (does NOT belong to key!!)
1267     /// @param[in]  args    TensorArguments for possible low rank approximations
1268     /// @return     sum     coefficients on level n+1
1269     template <typename T, std::size_t NDIM>
upsample(const keyT & key,const coeffT & coeff)1270     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::upsample(const keyT& key, const coeffT& coeff) const {
1271 
1272         // the twoscale coefficients: for upsampling use h0/h1; see Alpert Eq (3.35a/b)
1273         // note there are no difference coefficients; if you want that use unfilter
1274         const tensorT h[2] = {cdata.h0, cdata.h1};
1275         tensorT matrices[NDIM];
1276 
1277         // get the appropriate twoscale coefficients for each dimension
1278         for (size_t ii=0; ii<NDIM; ++ii) matrices[ii]=h[key.translation()[ii]%2];
1279 
1280         // transform and accumulate on the result
1281         const coeffT result=general_transform(coeff,matrices);
1282         return result;
1283     }
1284 
1285 
1286     /// Projects old function into new basis (only in reconstructed form)
1287     template <typename T, std::size_t NDIM>
project(const implT & old,bool fence)1288     void FunctionImpl<T,NDIM>::project(const implT& old, bool fence) {
1289         long kmin = std::min(cdata.k,old.cdata.k);
1290         std::vector<Slice> s(NDIM,Slice(0,kmin-1));
1291         typename dcT::const_iterator end = old.coeffs.end();
1292         for (typename dcT::const_iterator it=old.coeffs.begin(); it!=end; ++it) {
1293             const keyT& key = it->first;
1294             const nodeT& node = it->second;
1295             if (node.has_coeff()) {
1296                 coeffT c(cdata.vk,targs);
1297                 c(s) += node.coeff()(s);
1298                 coeffs.replace(key,nodeT(c,false));
1299             }
1300             else {
1301                 coeffs.replace(key,nodeT(coeffT(),true));
1302             }
1303         }
1304         if (fence)
1305             world.gop.fence();
1306     }
1307 
1308     template <typename T, std::size_t NDIM>
exists_and_has_children(const keyT & key)1309     bool FunctionImpl<T,NDIM>::exists_and_has_children(const keyT& key) const {
1310         return coeffs.probe(key) && coeffs.find(key).get()->second.has_children();
1311     }
1312 
1313     template <typename T, std::size_t NDIM>
exists_and_is_leaf(const keyT & key)1314     bool FunctionImpl<T,NDIM>::exists_and_is_leaf(const keyT& key) const {
1315         return coeffs.probe(key) && (not coeffs.find(key).get()->second.has_children());
1316     }
1317 
1318 
1319     template <typename T, std::size_t NDIM>
broaden_op(const keyT & key,const std::vector<Future<bool>> & v)1320     void FunctionImpl<T,NDIM>::broaden_op(const keyT& key, const std::vector< Future <bool> >& v) {
1321         for (unsigned int i=0; i<v.size(); ++i) {
1322             if (v[i]) {
1323                 refine_op(true_refine_test(), key);
1324                 break;
1325             }
1326         }
1327     }
1328 
1329     // For each local node sets value of norm tree to 0.0
1330     template <typename T, std::size_t NDIM>
zero_norm_tree()1331     void FunctionImpl<T,NDIM>::zero_norm_tree() {
1332         typename dcT::iterator end = coeffs.end();
1333         for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) {
1334             it->second.set_norm_tree(0.0);
1335         }
1336     }
1337 
1338     // Broaden tree
1339     template <typename T, std::size_t NDIM>
broaden(std::vector<bool> is_periodic,bool fence)1340     void FunctionImpl<T,NDIM>::broaden(std::vector<bool> is_periodic, bool fence) {
1341         typename dcT::iterator end = coeffs.end();
1342         for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) {
1343             const keyT& key = it->first;
1344             typename dcT::accessor acc;
1345             const auto found = coeffs.find(acc,key);
1346             MADNESS_ASSERT(found);
1347             nodeT& node = acc->second;
1348             if (node.has_coeff() &&
1349                 node.get_norm_tree() != -1.0 &&
1350                 node.coeff().normf() >= truncate_tol(thresh,key)) {
1351 
1352                 node.set_norm_tree(-1.0); // Indicates already broadened or result of broadening/refining
1353 
1354                 //int ndir = std::pow(3,NDIM);
1355                 int ndir = static_cast<int>(std::pow(static_cast<double>(3), static_cast<int>(NDIM)));
1356                 std::vector< Future <bool> > v = future_vector_factory<bool>(ndir);
1357                 keyT neigh;
1358                 int i=0;
1359                 for (HighDimIndexIterator it(NDIM,3); it; ++it) {
1360                     Vector<Translation,NDIM> l(*it);
1361                     for (std::size_t d=0; d<NDIM; ++d) {
1362                         const int odd = key.translation()[d] & 0x1L; // 1 if odd, 0 if even
1363                         l[d] -= 1; // (0,1,2) --> (-1,0,1)
1364                         if (l[d] == -1)
1365                             l[d] = -1-odd;
1366                         else if (l[d] ==  1)
1367                             l[d] = 2 - odd;
1368                     }
1369                     keyT neigh = neighbor(key, keyT(key.level(),l), is_periodic);
1370 
1371                     if (neigh.is_valid()) {
1372                         v[i++] = this->task(coeffs.owner(neigh), &implT::exists_and_has_children, neigh);
1373                     }
1374                     else {
1375                         v[i++].set(false);
1376                     }
1377                 }
1378                 woT::task(world.rank(), &implT::broaden_op, key, v);
1379             }
1380         }
1381         // Reset value of norm tree so that can repeat broadening
1382         if (fence) {
1383             world.gop.fence();
1384             zero_norm_tree();
1385             world.gop.fence();
1386         }
1387     }
1388 
1389     /// sum all the contributions from all scales after applying an operator in mod-NS form
1390     template <typename T, std::size_t NDIM>
trickle_down(bool fence)1391     void FunctionImpl<T,NDIM>::trickle_down(bool fence) {
1392         //            MADNESS_ASSERT(is_redundant());
1393         nonstandard = compressed = redundant = false;
1394         //            this->print_size("in trickle_down");
1395         if (world.rank() == coeffs.owner(cdata.key0))
1396             woT::task(world.rank(), &implT::trickle_down_op, cdata.key0,coeffT());
1397         if (fence)
1398             world.gop.fence();
1399     }
1400 
1401     /// sum all the contributions from all scales after applying an operator in mod-NS form
1402 
1403     /// cf reconstruct_op
1404     template <typename T, std::size_t NDIM>
trickle_down_op(const keyT & key,const coeffT & s)1405     void FunctionImpl<T,NDIM>::trickle_down_op(const keyT& key, const coeffT& s) {
1406         // Note that after application of an integral operator not all
1407         // siblings may be present so it is necessary to check existence
1408         // and if absent insert an empty leaf node.
1409         //
1410         // If summing the result of an integral operator (i.e., from
1411         // non-standard form) there will be significant scaling function
1412         // coefficients at all levels and possibly difference coefficients
1413         // in leaves, hence the tree may refine as a result.
1414         typename dcT::iterator it = coeffs.find(key).get();
1415         if (it == coeffs.end()) {
1416             coeffs.replace(key,nodeT(coeffT(),false));
1417             it = coeffs.find(key).get();
1418         }
1419         nodeT& node = it->second;
1420 
1421         // The integral operator will correctly connect interior nodes
1422         // to children but may leave interior nodes without coefficients
1423         // ... but they still need to sum down so just give them zeros
1424         if (node.coeff().has_no_data()) node.coeff()=coeffT(cdata.vk,targs);
1425 
1426         //            if (node.has_children() || node.has_coeff()) { // Must allow for inconsistent state from transform, etc.
1427         if (node.has_children()) { // Must allow for inconsistent state from transform, etc.
1428             coeffT d = node.coeff();
1429             if (key.level() > 0) d += s; // -- note accumulate for NS summation
1430             node.clear_coeff();
1431             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
1432                 const keyT& child = kit.key();
1433                 coeffT ss= upsample(child,d);
1434                 ss.reduce_rank(thresh);
1435                 PROFILE_BLOCK(recon_send);
1436                 woT::task(coeffs.owner(child), &implT::trickle_down_op, child, ss);
1437             }
1438         }
1439         else {
1440             node.coeff()+=s;
1441             node.coeff().reduce_rank(thresh);
1442         }
1443     }
1444 
1445     template <typename T, std::size_t NDIM>
reconstruct(bool fence)1446     void FunctionImpl<T,NDIM>::reconstruct(bool fence) {
1447         // Must set true here so that successive calls without fence do the right thing
1448         MADNESS_ASSERT(not is_redundant());
1449         nonstandard = compressed = redundant = false;
1450         if (world.rank() == coeffs.owner(cdata.key0))
1451             woT::task(world.rank(), &implT::reconstruct_op, cdata.key0,coeffT());
1452         if (fence)
1453             world.gop.fence();
1454     }
1455 
1456     /// compress the wave function
1457 
1458     /// after application there will be sum coefficients at the root level,
1459     /// and difference coefficients at all other levels; furthermore:
1460     /// @param[in] nonstandard	keep sum coeffs at all other levels, except leaves
1461     /// @param[in] keepleaves	keep sum coeffs (but no diff coeffs) at leaves
1462     /// @param[in] redundant    keep only sum coeffs at all levels, discard difference coeffs
1463     template <typename T, std::size_t NDIM>
compress(bool nonstandard,bool keepleaves,bool redundant,bool fence)1464     void FunctionImpl<T,NDIM>::compress(bool nonstandard, bool keepleaves, bool redundant, bool fence) {
1465         MADNESS_ASSERT(not is_redundant());
1466         // Must set true here so that successive calls without fence do the right thing
1467         this->compressed = true;
1468         this->nonstandard = nonstandard;
1469         this->redundant = redundant;
1470 
1471         // these two are exclusive
1472         MADNESS_ASSERT(not (redundant and nonstandard));
1473         // otherwise we loose information
1474         if (redundant) {MADNESS_ASSERT(keepleaves);}
1475 
1476         //            this->print_tree();
1477         if (world.rank() == coeffs.owner(cdata.key0)) {
1478 
1479             compress_spawn(cdata.key0, nonstandard, keepleaves, redundant);
1480         }
1481         if (fence)
1482             world.gop.fence();
1483     }
1484 
1485     /// convert this to redundant, i.e. have sum coefficients on all levels
1486     template <typename T, std::size_t NDIM>
make_redundant(const bool fence)1487     void FunctionImpl<T,NDIM>::make_redundant(const bool fence) {
1488 
1489         // fast return if possible
1490         if (is_redundant()) return;
1491 
1492         // NS form might have leaf sum coeffs, but we don't know
1493         // change to standard compressed form
1494         if (is_nonstandard()) this->standard(true);
1495 
1496         // we need the leaf sum coeffs, so reconstruct
1497         if (is_compressed()) reconstruct(true);
1498 
1499         compress(false,true,true,fence);
1500         compressed=false;
1501 
1502     }
1503 
1504     /// convert this from redundant to standard reconstructed form
1505     template <typename T, std::size_t NDIM>
undo_redundant(const bool fence)1506     void FunctionImpl<T,NDIM>::undo_redundant(const bool fence) {
1507 
1508         if (!is_redundant()) return;
1509         redundant = compressed = nonstandard = false;
1510         flo_unary_op_node_inplace(remove_internal_coeffs(),fence);
1511     }
1512 
1513 
1514     /// compute for each FunctionNode the norm of the function inside that node
1515     template <typename T, std::size_t NDIM>
norm_tree(bool fence)1516     void FunctionImpl<T,NDIM>::norm_tree(bool fence) {
1517         if (world.rank() == coeffs.owner(cdata.key0))
1518             norm_tree_spawn(cdata.key0);
1519         if (fence)
1520             world.gop.fence();
1521     }
1522 
1523     template <typename T, std::size_t NDIM>
norm_tree_op(const keyT & key,const std::vector<Future<double>> & v)1524     double FunctionImpl<T,NDIM>::norm_tree_op(const keyT& key, const std::vector< Future<double> >& v) {
1525         //PROFILE_MEMBER_FUNC(FunctionImpl);
1526         double sum = 0.0;
1527         int i=0;
1528         for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
1529             double value = v[i].get();
1530             sum += value*value;
1531         }
1532         sum = sqrt(sum);
1533         coeffs.task(key, &nodeT::set_norm_tree, sum); // why a task? because send is deprecated to keep comm thread free
1534         //if (key.level() == 0) std::cout << "NORM_TREE_TOP " << sum << "\n";
1535         return sum;
1536     }
1537 
1538     template <typename T, std::size_t NDIM>
norm_tree_spawn(const keyT & key)1539     Future<double> FunctionImpl<T,NDIM>::norm_tree_spawn(const keyT& key) {
1540         nodeT& node = coeffs.find(key).get()->second;
1541         if (node.has_children()) {
1542             std::vector< Future<double> > v = future_vector_factory<double>(1<<NDIM);
1543             int i=0;
1544             for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
1545                 v[i] = woT::task(coeffs.owner(kit.key()), &implT::norm_tree_spawn, kit.key());
1546             }
1547             return woT::task(world.rank(),&implT::norm_tree_op, key, v);
1548         }
1549         else {
1550             //                return Future<double>(node.coeff().normf());
1551             const double norm=node.coeff().normf();
1552             // invoked locally anyways
1553             node.set_norm_tree(norm);
1554             return Future<double>(norm);
1555         }
1556     }
1557 
1558     /// truncate using a tree in reconstructed form
1559 
1560     /// must be invoked where key is local
1561     template <typename T, std::size_t NDIM>
truncate_reconstructed_spawn(const keyT & key,const double tol)1562     Future<typename FunctionImpl<T,NDIM>::coeffT> FunctionImpl<T,NDIM>::truncate_reconstructed_spawn(const keyT& key, const double tol) {
1563         MADNESS_ASSERT(coeffs.probe(key));
1564         nodeT& node = coeffs.find(key).get()->second;
1565 
1566         // if this is a leaf node just return the sum coefficients
1567         if (not node.has_children()) return Future<coeffT>(node.coeff());
1568 
1569         // if this is an internal node, wait for all the children's sum coefficients
1570         // and use them to determine if the children can be removed
1571         std::vector<Future<coeffT> > v = future_vector_factory<coeffT>(1<<NDIM);
1572         int i=0;
1573         for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
1574             v[i] = woT::task(coeffs.owner(kit.key()), &implT::truncate_reconstructed_spawn, kit.key(),tol,TaskAttributes::hipri());
1575         }
1576 
1577         // will return (possibly empty) sum coefficients
1578         return woT::task(world.rank(),&implT::truncate_reconstructed_op,key,v,tol,TaskAttributes::hipri());
1579 
1580     }
1581 
1582     /// given the sum coefficients of all children, truncate or not
1583 
1584     /// @return     new sum coefficients (empty if internal, not empty, if new leaf); might delete its children
1585     template <typename T, std::size_t NDIM>
truncate_reconstructed_op(const keyT & key,const std::vector<Future<coeffT>> & v,const double tol)1586     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::truncate_reconstructed_op(const keyT& key, const std::vector< Future<coeffT > >& v, const double tol) {
1587 
1588         MADNESS_ASSERT(coeffs.probe(key));
1589 
1590         // the sum coefficients might be empty, which means they come from an internal node
1591         // and we must not truncate; so just return empty coeffs again
1592         for (size_t i=0; i<v.size(); ++i) if (v[i].get().has_no_data()) return coeffT();
1593 
1594         // do not truncate below level 1
1595         if (key.level()<2) return coeffT();
1596 
1597         // compute the wavelet coefficients from the child nodes
1598         typename dcT::accessor acc;
1599         const auto found = coeffs.find(acc, key);
1600         MADNESS_ASSERT(found);
1601         int i=0;
1602         tensorT d(cdata.v2k);
1603         for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
1604             //                d(child_patch(kit.key())) += v[i].get();
1605             d(child_patch(kit.key())) += v[i].get().full_tensor_copy();
1606         }
1607 
1608         d = filter(d);
1609         tensorT s=copy(d(cdata.s0));
1610         d(cdata.s0) = 0.0;
1611         const double error=d.normf();
1612 
1613         nodeT& node = coeffs.find(key).get()->second;
1614 
1615         if (error < truncate_tol(tol,key)) {
1616             node.set_has_children(false);
1617             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
1618                 coeffs.erase(kit.key());
1619             }
1620             // "replace" children with new sum coefficients
1621             coeffT ss=coeffT(s,targs);
1622             acc->second.set_coeff(ss);
1623             return ss;
1624         } else {
1625             return coeffT();
1626         }
1627     }
1628 
1629     /// calculate the wavelet coefficients using the sum coefficients of all child nodes
1630 
1631     /// @param[in] key 	this's key
1632     /// @param[in] v 	sum coefficients of the child nodes
1633     /// @param[in] nonstandard  keep the sum coefficients with the wavelet coefficients
1634     /// @param[in] redundant    keep only the sum coefficients, discard the wavelet coefficients
1635     /// @return 		the sum coefficients
1636     template <typename T, std::size_t NDIM>
compress_op(const keyT & key,const std::vector<Future<coeffT>> & v,bool nonstandard,bool redundant)1637     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::compress_op(const keyT& key, const std::vector< Future<coeffT > >& v, bool nonstandard, bool redundant) {
1638         //PROFILE_MEMBER_FUNC(FunctionImpl);
1639 
1640         MADNESS_ASSERT(not redundant);
1641         double cpu0=cpu_time();
1642         // Copy child scaling coeffs into contiguous block
1643         tensorT d(cdata.v2k);
1644         //            coeffT d(cdata.v2k,targs);
1645         int i=0;
1646         for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
1647             //                d(child_patch(kit.key())) += v[i].get();
1648             d(child_patch(kit.key())) += v[i].get().full_tensor_copy();
1649         }
1650 
1651         d = filter(d);
1652         double cpu1=cpu_time();
1653         timer_filter.accumulate(cpu1-cpu0);
1654         cpu0=cpu1;
1655 
1656         typename dcT::accessor acc;
1657         const auto found = coeffs.find(acc, key);
1658         MADNESS_ASSERT(found);
1659 
1660         if (acc->second.has_coeff()) {
1661             print(" stuff in compress_op");
1662             //                const coeffT& c = acc->second.coeff();
1663             const tensorT c = acc->second.coeff().full_tensor_copy();
1664             if (c.dim(0) == k) {
1665                 d(cdata.s0) += c;
1666             }
1667             else {
1668                 d += c;
1669             }
1670         }
1671 
1672         // tighter thresh for internal nodes
1673         TensorArgs targs2=targs;
1674         targs2.thresh*=0.1;
1675 
1676         // need the deep copy for contiguity
1677         coeffT ss=coeffT(copy(d(cdata.s0)),targs2);
1678 
1679         if (key.level()> 0 && !nonstandard)
1680             d(cdata.s0) = 0.0;
1681 
1682         // insert either sum or difference coefficients
1683         if (redundant) {
1684             acc->second.set_coeff(ss);
1685         } else {
1686             coeffT dd=coeffT(d,targs2);
1687             acc->second.set_coeff(dd);
1688         }
1689         cpu1=cpu_time();
1690         timer_compress_svd.accumulate(cpu1-cpu0);
1691 
1692         // return sum coefficients
1693         return ss;
1694     }
1695 
1696     /// similar to compress_op, but insert only the sum coefficients in the tree
1697 
1698     /// @param[in] key  this's key
1699     /// @param[in] v    sum coefficients of the child nodes
1700     /// @return         the sum coefficients
1701     template <typename T, std::size_t NDIM>
make_redundant_op(const keyT & key,const std::vector<Future<coeffT>> & v)1702     typename FunctionImpl<T,NDIM>::coeffT FunctionImpl<T,NDIM>::make_redundant_op(const keyT& key, const std::vector< Future<coeffT > >& v) {
1703 
1704         // get the sum coefficients of this level given the sum coefficients of level n+1
1705         TensorArgs targs2=targs;
1706         targs2.thresh*=0.1;
1707         coeffT s(this->downsample(key,v),targs2);
1708 
1709         // insert sum coefficients into tree
1710         typename dcT::accessor acc;
1711         const auto found = coeffs.find(acc, key);
1712         MADNESS_ASSERT(found);
1713         MADNESS_ASSERT(not (acc->second.has_coeff()));
1714         acc->second.set_coeff(s);
1715 
1716         return s;
1717     }
1718 
1719     /// Changes non-standard compressed form to standard compressed form
1720     template <typename T, std::size_t NDIM>
standard(bool fence)1721     void FunctionImpl<T,NDIM>::standard(bool fence) {
1722 
1723         flo_unary_op_node_inplace(do_standard(this),fence);
1724         nonstandard = false;
1725     }
1726 
1727 
1728     /// after apply we need to do some cleanup;
1729     template <typename T, std::size_t NDIM>
finalize_apply(const bool fence)1730     double FunctionImpl<T,NDIM>::finalize_apply(const bool fence) {
1731         TensorArgs tight_args(targs);
1732         tight_args.thresh*=0.01;
1733         double begin=wall_time();
1734         flo_unary_op_node_inplace(do_consolidate_buffer(tight_args),true);
1735 
1736         // reduce the rank of the final nodes, leave full tensors unchanged
1737         //            flo_unary_op_node_inplace(do_reduce_rank(tight_args.thresh),true);
1738         flo_unary_op_node_inplace(do_reduce_rank(targs),true);
1739 
1740         // change TT_FULL to low rank
1741         flo_unary_op_node_inplace(do_change_tensor_type(targs),true);
1742 
1743         // truncate leaf nodes to avoid excessive tree refinement
1744         flo_unary_op_node_inplace(do_truncate_NS_leafs(this),true);
1745 
1746         double end=wall_time();
1747         double elapsed=end-begin;
1748         this->compressed=true;
1749         this->nonstandard=true;
1750         this->redundant=false;
1751         if (fence) world.gop.fence();
1752         return elapsed;
1753     }
1754 
1755     /// Returns the square of the local norm ... no comms
1756     template <typename T, std::size_t NDIM>
norm2sq_local()1757     double FunctionImpl<T,NDIM>::norm2sq_local() const {
1758         PROFILE_MEMBER_FUNC(FunctionImpl);
1759         typedef Range<typename dcT::const_iterator> rangeT;
1760         return world.taskq.reduce<double,rangeT,do_norm2sq_local>(rangeT(coeffs.begin(),coeffs.end()),
1761                                                                   do_norm2sq_local());
1762     }
1763 
1764 
1765 
1766 
1767     /// Returns the maximum local depth of the tree ... no communications.
1768     template <typename T, std::size_t NDIM>
max_local_depth()1769     std::size_t FunctionImpl<T,NDIM>::max_local_depth() const {
1770         std::size_t maxdepth = 0;
1771         typename dcT::const_iterator end = coeffs.end();
1772         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
1773             std::size_t N = (std::size_t) it->first.level();
1774             if (N> maxdepth)
1775                 maxdepth = N;
1776         }
1777         return maxdepth;
1778     }
1779 
1780 
1781     /// Returns the maximum depth of the tree ... collective ... global sum/broadcast
1782     template <typename T, std::size_t NDIM>
max_depth()1783     std::size_t FunctionImpl<T,NDIM>::max_depth() const {
1784         std::size_t maxdepth  = max_local_depth();
1785         world.gop.max(maxdepth);
1786         return maxdepth;
1787     }
1788 
1789     /// Returns the max number of nodes on a processor
1790     template <typename T, std::size_t NDIM>
max_nodes()1791     std::size_t FunctionImpl<T,NDIM>::max_nodes() const {
1792         std::size_t maxsize = 0;
1793         maxsize = coeffs.size();
1794         world.gop.max(maxsize);
1795         return maxsize;
1796     }
1797 
1798     /// Returns the min number of nodes on a processor
1799     template <typename T, std::size_t NDIM>
min_nodes()1800     std::size_t FunctionImpl<T,NDIM>::min_nodes() const {
1801         std::size_t minsize = 0;
1802         minsize = coeffs.size();
1803         world.gop.min(minsize);
1804         return minsize;
1805     }
1806 
1807     /// Returns the size of the tree structure of the function ... collective global sum
1808     template <typename T, std::size_t NDIM>
tree_size()1809     std::size_t FunctionImpl<T,NDIM>::tree_size() const {
1810         std::size_t sum = 0;
1811         sum = coeffs.size();
1812         world.gop.sum(sum);
1813         return sum;
1814     }
1815 
1816     /// Returns the number of coefficients in the function ... collective global sum
1817     template <typename T, std::size_t NDIM>
size()1818     std::size_t FunctionImpl<T,NDIM>::size() const {
1819         std::size_t sum = 0;
1820 #if 1
1821         typename dcT::const_iterator end = coeffs.end();
1822         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
1823             const nodeT& node = it->second;
1824             if (node.has_coeff())
1825                 sum+=node.size();
1826         }
1827         //            print("proc",world.rank(),sum);
1828 #else
1829         typename dcT::const_iterator end = coeffs.end();
1830         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
1831             const nodeT& node = it->second;
1832             if (node.has_coeff())
1833                 ++sum;
1834         }
1835         if (is_compressed())
1836             for (std::size_t i=0; i<NDIM; ++i)
1837                 sum *= 2*cdata.k;
1838         else
1839             for (std::size_t i=0; i<NDIM; ++i)
1840                 sum *= cdata.k;
1841 #endif
1842         world.gop.sum(sum);
1843 
1844         return sum;
1845     }
1846 
1847     /// Returns the number of coefficients in the function ... collective global sum
1848     template <typename T, std::size_t NDIM>
real_size()1849     std::size_t FunctionImpl<T,NDIM>::real_size() const {
1850         std::size_t sum = coeffs.size() * (sizeof(keyT) + sizeof(nodeT));
1851         typename dcT::const_iterator end = coeffs.end();
1852         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
1853             const nodeT& node = it->second;
1854             if (node.has_coeff()) sum+=node.coeff().real_size();
1855         }
1856         world.gop.sum(sum);
1857         return sum;
1858     }
1859 
1860 
1861     /// print tree size and size
1862     template <typename T, std::size_t NDIM>
print_size(const std::string name)1863     void FunctionImpl<T,NDIM>::print_size(const std::string name) const {
1864         const size_t tsize=this->tree_size();
1865         const size_t size=this->size();
1866         const size_t rsize=this->real_size();
1867         const double wall=wall_time();
1868         const double d=sizeof(T);
1869         const double fac=1024*1024*1024;
1870 
1871         double norm=0.0;
1872         {
1873             double local = norm2sq_local();
1874             this->world.gop.sum(local);
1875             this->world.gop.fence();
1876             norm=sqrt(local);
1877         }
1878 
1879         if (this->world.rank()==0) {
1880             printf("%40s at time %.1fs: norm/tree/real/size: %7.5f %zu, %6.3f, %6.3f GByte\n",
1881                    (name.c_str()), wall, norm, tsize,double(rsize)/fac,double(size)/fac*d);
1882         }
1883     }
1884 
1885     /// print the number of configurations per node
1886     template <typename T, std::size_t NDIM>
print_stats()1887     void FunctionImpl<T,NDIM>::print_stats() const {
1888         if (this->targs.tt==TT_FULL) return;
1889         int dim=NDIM/2;
1890         int k0=k;
1891         if (is_compressed()) k0=2*k;
1892         Tensor<long> n(int(std::pow(double(k0),double(dim))+1));
1893         long n_full=0;
1894         long n_large=0;
1895 
1896         if (world.rank()==0) print("n.size(),k0,dim",n.size(),k0,dim);
1897         typename dcT::const_iterator end = coeffs.end();
1898         for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) {
1899             const nodeT& node = it->second;
1900             if (node.has_coeff()) {
1901                 if (node.coeff().rank()>long(n.size())) {
1902                     ++n_large;
1903                 } else if (node.coeff().rank()==-1) {
1904                     ++n_full;
1905                 } else if (node.coeff().rank()<0) {
1906                     print("small rank",node.coeff().rank());
1907                 } else {
1908                     n[node.coeff().rank()]++;
1909                 }
1910             }
1911         }
1912 
1913         world.gop.sum(n.ptr(), n.size());
1914 
1915         if (world.rank()==0) {
1916             print("configurations     number of nodes");
1917             if (world.rank()==0) print("        full rank    ",n_full);
1918             for (unsigned int i=0; i<n.size(); i++) {
1919                 long m=n[i];
1920                 if (world.rank()==0) print("           ",i,"    ",m);
1921             }
1922             if (world.rank()==0) print("       large rank    ",n_large);
1923         }
1924     }
1925 
1926     template <typename T, std::size_t NDIM>
eval_cube(Level n,coordT & x,const tensorT & c)1927     T FunctionImpl<T,NDIM>::eval_cube(Level n, coordT& x, const tensorT& c) const {
1928         PROFILE_MEMBER_FUNC(FunctionImpl);
1929         const int k = cdata.k;
1930         double px[NDIM][k];
1931         T sum = T(0.0);
1932 
1933         for (std::size_t i=0; i<NDIM; ++i) legendre_scaling_functions(x[i],k,px[i]);
1934 
1935         if (NDIM == 1) {
1936             for (int p=0; p<k; ++p)
1937                 sum += c(p)*px[0][p];
1938         }
1939         else if (NDIM == 2) {
1940             for (int p=0; p<k; ++p)
1941                 for (int q=0; q<k; ++q)
1942                     sum += c(p,q)*px[0][p]*px[1][q];
1943         }
1944         else if (NDIM == 3) {
1945             for (int p=0; p<k; ++p)
1946                 for (int q=0; q<k; ++q)
1947                     for (int r=0; r<k; ++r)
1948                         sum += c(p,q,r)*px[0][p]*px[1][q]*px[2][r];
1949         }
1950         else if (NDIM == 4) {
1951             for (int p=0; p<k; ++p)
1952                 for (int q=0; q<k; ++q)
1953                     for (int r=0; r<k; ++r)
1954                         for (int s=0; s<k; ++s)
1955                             sum += c(p,q,r,s)*px[0][p]*px[1][q]*px[2][r]*px[3][s];
1956         }
1957         else if (NDIM == 5) {
1958             for (int p=0; p<k; ++p)
1959                 for (int q=0; q<k; ++q)
1960                     for (int r=0; r<k; ++r)
1961                         for (int s=0; s<k; ++s)
1962                             for (int t=0; t<k; ++t)
1963                                 sum += c(p,q,r,s,t)*px[0][p]*px[1][q]*px[2][r]*px[3][s]*px[4][t];
1964         }
1965         else if (NDIM == 6) {
1966             for (int p=0; p<k; ++p)
1967                 for (int q=0; q<k; ++q)
1968                     for (int r=0; r<k; ++r)
1969                         for (int s=0; s<k; ++s)
1970                             for (int t=0; t<k; ++t)
1971                                 for (int u=0; u<k; ++u)
1972                                     sum += c(p,q,r,s,t,u)*px[0][p]*px[1][q]*px[2][r]*px[3][s]*px[4][t]*px[5][u];
1973         }
1974         else {
1975             MADNESS_EXCEPTION("FunctionImpl:eval_cube:NDIM?",NDIM);
1976         }
1977         return sum*pow(2.0,0.5*NDIM*n)/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
1978     }
1979 
1980     template <typename T, std::size_t NDIM>
reconstruct_op(const keyT & key,const coeffT & s)1981     void FunctionImpl<T,NDIM>::reconstruct_op(const keyT& key, const coeffT& s) {
1982         //PROFILE_MEMBER_FUNC(FunctionImpl);
1983         // Note that after application of an integral operator not all
1984         // siblings may be present so it is necessary to check existence
1985         // and if absent insert an empty leaf node.
1986         //
1987         // If summing the result of an integral operator (i.e., from
1988         // non-standard form) there will be significant scaling function
1989         // coefficients at all levels and possibly difference coefficients
1990         // in leaves, hence the tree may refine as a result.
1991         typename dcT::iterator it = coeffs.find(key).get();
1992         if (it == coeffs.end()) {
1993             coeffs.replace(key,nodeT(coeffT(),false));
1994             it = coeffs.find(key).get();
1995         }
1996         nodeT& node = it->second;
1997 
1998         // The integral operator will correctly connect interior nodes
1999         // to children but may leave interior nodes without coefficients
2000         // ... but they still need to sum down so just give them zeros
2001         if (node.has_children() && !node.has_coeff()) {
2002             node.set_coeff(coeffT(cdata.v2k,targs));
2003         }
2004 
2005         if (node.has_children() || node.has_coeff()) { // Must allow for inconsistent state from transform, etc.
2006             coeffT d = node.coeff();
2007             if (!d.has_data()) d = coeffT(cdata.v2k,targs);
2008             if (key.level() > 0) d(cdata.s0) += s; // -- note accumulate for NS summation
2009             if (d.dim(0)==2*get_k()) {              // d might be pre-truncated if it's a leaf
2010                 d = unfilter(d);
2011                 node.clear_coeff();
2012                 node.set_has_children(true);
2013                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2014                     const keyT& child = kit.key();
2015                     coeffT ss = copy(d(child_patch(child)));
2016                     ss.reduce_rank(thresh);
2017                     //PROFILE_BLOCK(recon_send); // Too fine grain for routine profiling
2018                     woT::task(coeffs.owner(child), &implT::reconstruct_op, child, ss);
2019                 }
2020             } else {
2021                 MADNESS_ASSERT(node.is_leaf());
2022                 //                node.coeff()+=s;
2023                 node.coeff().reduce_rank(targs.thresh);
2024             }
2025         }
2026         else {
2027             coeffT ss=s;
2028             if (s.has_no_data()) ss=coeffT(cdata.vk,targs);
2029             if (key.level()) node.set_coeff(copy(ss));
2030             else node.set_coeff(ss);
2031         }
2032     }
2033 
2034     template <typename T, std::size_t NDIM>
fcube(const Key<NDIM> & key,T (* f)(const Vector<double,NDIM> &),const Tensor<double> & qx)2035     Tensor<T> fcube(const Key<NDIM>& key, T (*f)(const Vector<double,NDIM>&), const Tensor<double>& qx) {
2036         //      fcube(key,typename FunctionFactory<T,NDIM>::FunctorInterfaceWrapper(f) , qx, fval);
2037         std::vector<long> npt(NDIM,qx.dim(0));
2038         Tensor<T> fval(npt);
2039         fcube(key,ElementaryInterface<T,NDIM>(f) , qx, fval);
2040         return fval;
2041     }
2042 
2043     template <typename T, std::size_t NDIM>
fcube(const Key<NDIM> & key,const FunctionFunctorInterface<T,NDIM> & f,const Tensor<double> & qx)2044     Tensor<T> fcube(const Key<NDIM>& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx) {
2045         //      fcube(key,typename FunctionFactory<T,NDIM>::FunctorInterfaceWrapper(f) , qx, fval);
2046         std::vector<long> npt(NDIM,qx.dim(0));
2047         Tensor<T> fval(npt);
2048         fcube(key, f, qx, fval);
2049         return fval;
2050     }
2051 
2052     template <typename T, std::size_t NDIM>
2053     //    void FunctionImpl<T,NDIM>::fcube(const keyT& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, tensorT& fval) const {
fcube(const Key<NDIM> & key,const FunctionFunctorInterface<T,NDIM> & f,const Tensor<double> & qx,Tensor<T> & fval)2054     void fcube(const Key<NDIM>& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, Tensor<T>& fval) {
2055         //~ template <typename T, std::size_t NDIM> template< typename FF>
2056         //~ void FunctionImpl<T,NDIM>::fcube(const keyT& key, const FF& f, const Tensor<double>& qx, tensorT& fval) const {
2057         typedef Vector<double,NDIM> coordT;
2058         //PROFILE_MEMBER_FUNC(FunctionImpl);
2059         const Vector<Translation,NDIM>& l = key.translation();
2060         const Level n = key.level();
2061         const double h = std::pow(0.5,double(n));
2062         coordT c; // will hold the point in user coordinates
2063         const int npt = qx.dim(0);
2064 
2065         const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width();
2066         const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell();
2067 
2068         // Do pre-screening of the FunctionFunctorInterface, f, before calculating f(r) at quadrature points
2069         coordT c1, c2;
2070         for (std::size_t i = 0; i < NDIM; i++) {
2071           c1[i] = cell(i,0) + h*cell_width[i]*(l[i] + qx((long)0));
2072           c2[i] = cell(i,0) + h*cell_width[i]*(l[i] + qx(npt-1));
2073         }
2074         if (f.screened(c1, c2)) {
2075             fval(___) = 0.0;
2076             return;
2077         }
2078 
2079         Tensor<double> vqx;
2080         bool vectorized = f.supports_vectorized();
2081         if (vectorized) {
2082             T* fvptr = fval.ptr();
2083             if (NDIM == 1) {
2084                 double* x1 = new double[npt];
2085                 int idx = 0;
2086                 for (int i=0; i<npt; ++i, ++idx) {
2087                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2088                     x1[idx] = c[0];
2089                 }
2090                 Vector<double*,1> xvals {x1};
2091                 f(xvals, fvptr, npt);
2092                 delete [] x1;
2093             }
2094             else if (NDIM == 2) {
2095                 double* x1 = new double[npt*npt];
2096                 double* x2 = new double[npt*npt];
2097                 int idx = 0;
2098                 for (int i=0; i<npt; ++i) {
2099                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2100                     for (int j=0; j<npt; ++j, ++idx) {
2101                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2102                         x1[idx] = c[0];
2103                         x2[idx] = c[1];
2104                     }
2105                 }
2106                 Vector<double*,2> xvals {x1, x2};
2107                 f(xvals, fvptr, npt*npt);
2108                 delete [] x1;
2109                 delete [] x2;
2110             }
2111             else if (NDIM == 3) {
2112                 double* x1 = new double[npt*npt*npt];
2113                 double* x2 = new double[npt*npt*npt];
2114                 double* x3 = new double[npt*npt*npt];
2115                 int idx = 0;
2116                 for (int i=0; i<npt; ++i) {
2117                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2118                     for (int j=0; j<npt; ++j) {
2119                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2120                         for (int k=0; k<npt; ++k, ++idx) {
2121                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2122                             x1[idx] = c[0];
2123                             x2[idx] = c[1];
2124                             x3[idx] = c[2];
2125                         }
2126                     }
2127                 }
2128                 Vector<double*,3> xvals {x1, x2, x3};
2129                 f(xvals, fvptr, npt*npt*npt);
2130                 delete [] x1;
2131                 delete [] x2;
2132                 delete [] x3;
2133             }
2134             else if (NDIM == 4) {
2135                 double* x1 = new double[npt*npt*npt*npt];
2136                 double* x2 = new double[npt*npt*npt*npt];
2137                 double* x3 = new double[npt*npt*npt*npt];
2138                 double* x4 = new double[npt*npt*npt*npt];
2139                 int idx = 0;
2140                 for (int i=0; i<npt; ++i) {
2141                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2142                     for (int j=0; j<npt; ++j) {
2143                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2144                         for (int k=0; k<npt; ++k) {
2145                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2146                             for (int m=0; m<npt; ++m, ++idx) {
2147                                 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2148                                 x1[idx] = c[0];
2149                                 x2[idx] = c[1];
2150                                 x3[idx] = c[2];
2151                                 x4[idx] = c[3];
2152                             }
2153                         }
2154                     }
2155                 }
2156                 Vector<double*,4> xvals {x1, x2, x3, x4};
2157                 f(xvals, fvptr, npt*npt*npt*npt);
2158                 delete [] x1;
2159                 delete [] x2;
2160                 delete [] x3;
2161                 delete [] x4;
2162             }
2163             else if (NDIM == 5) {
2164                 double* x1 = new double[npt*npt*npt*npt*npt];
2165                 double* x2 = new double[npt*npt*npt*npt*npt];
2166                 double* x3 = new double[npt*npt*npt*npt*npt];
2167                 double* x4 = new double[npt*npt*npt*npt*npt];
2168                 double* x5 = new double[npt*npt*npt*npt*npt];
2169                 int idx = 0;
2170                 for (int i=0; i<npt; ++i) {
2171                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2172                     for (int j=0; j<npt; ++j) {
2173                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2174                         for (int k=0; k<npt; ++k) {
2175                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2176                             for (int m=0; m<npt; ++m) {
2177                                 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2178                                 for (int n=0; n<npt; ++n, ++idx) {
2179                                     c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy
2180                                     x1[idx] = c[0];
2181                                     x2[idx] = c[1];
2182                                     x3[idx] = c[2];
2183                                     x4[idx] = c[3];
2184                                     x5[idx] = c[4];
2185                                 }
2186                             }
2187                         }
2188                     }
2189                 }
2190                 Vector<double*,5> xvals {x1, x2, x3, x4, x5};
2191                 f(xvals, fvptr, npt*npt*npt*npt*npt);
2192                 delete [] x1;
2193                 delete [] x2;
2194                 delete [] x3;
2195                 delete [] x4;
2196                 delete [] x5;
2197             }
2198             else if (NDIM == 6) {
2199                 double* x1 = new double[npt*npt*npt*npt*npt*npt];
2200                 double* x2 = new double[npt*npt*npt*npt*npt*npt];
2201                 double* x3 = new double[npt*npt*npt*npt*npt*npt];
2202                 double* x4 = new double[npt*npt*npt*npt*npt*npt];
2203                 double* x5 = new double[npt*npt*npt*npt*npt*npt];
2204                 double* x6 = new double[npt*npt*npt*npt*npt*npt];
2205                 int idx = 0;
2206                 for (int i=0; i<npt; ++i) {
2207                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2208                     for (int j=0; j<npt; ++j) {
2209                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2210                         for (int k=0; k<npt; ++k) {
2211                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2212                             for (int m=0; m<npt; ++m) {
2213                                 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2214                                 for (int n=0; n<npt; ++n) {
2215                                     c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy
2216                                     for (int p=0; p<npt; ++p, ++idx) {
2217                                         c[5] = cell(5,0) + h*cell_width[5]*(l[5] + qx(p)); // zz
2218                                         x1[idx] = c[0];
2219                                         x2[idx] = c[1];
2220                                         x3[idx] = c[2];
2221                                         x4[idx] = c[3];
2222                                         x5[idx] = c[4];
2223                                         x6[idx] = c[5];
2224                                     }
2225                                 }
2226                             }
2227                         }
2228                     }
2229                 }
2230                 Vector<double*,6> xvals {x1, x2, x3, x4, x5, x6};
2231                 f(xvals, fvptr, npt*npt*npt*npt*npt*npt);
2232                 delete [] x1;
2233                 delete [] x2;
2234                 delete [] x3;
2235                 delete [] x4;
2236                 delete [] x5;
2237                 delete [] x6;
2238             }
2239             else {
2240                 MADNESS_EXCEPTION("FunctionImpl: fcube: confused about NDIM?",NDIM);
2241             }
2242         }
2243         else {
2244             if (NDIM == 1) {
2245                 for (int i=0; i<npt; ++i) {
2246                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2247                     fval(i) = f(c);
2248                     MADNESS_ASSERT(!std::isnan(fval(i)));
2249                 }
2250             }
2251             else if (NDIM == 2) {
2252                 for (int i=0; i<npt; ++i) {
2253                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2254                     for (int j=0; j<npt; ++j) {
2255                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2256                         fval(i,j) = f(c);
2257                         MADNESS_ASSERT(!std::isnan(fval(i,j)));
2258                     }
2259                 }
2260             }
2261             else if (NDIM == 3) {
2262                 for (int i=0; i<npt; ++i) {
2263                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2264                     for (int j=0; j<npt; ++j) {
2265                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2266                         for (int k=0; k<npt; ++k) {
2267                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2268                             fval(i,j,k) = f(c);
2269                             MADNESS_ASSERT(!std::isnan(fval(i,j,k)));
2270                         }
2271                     }
2272                 }
2273             }
2274             else if (NDIM == 4) {
2275                 for (int i=0; i<npt; ++i) {
2276                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2277                     for (int j=0; j<npt; ++j) {
2278                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2279                         for (int k=0; k<npt; ++k) {
2280                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2281                             for (int m=0; m<npt; ++m) {
2282                                 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2283                                 fval(i,j,k,m) = f(c);
2284                                 MADNESS_ASSERT(!std::isnan(fval(i,j,k,m)));
2285                             }
2286                         }
2287                     }
2288                 }
2289             }
2290             else if (NDIM == 5) {
2291                 for (int i=0; i<npt; ++i) {
2292                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2293                     for (int j=0; j<npt; ++j) {
2294                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2295                         for (int k=0; k<npt; ++k) {
2296                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2297                             for (int m=0; m<npt; ++m) {
2298                                 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2299                                 for (int n=0; n<npt; ++n) {
2300                                     c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy
2301                                     fval(i,j,k,m,n) = f(c);
2302                                     MADNESS_ASSERT(!std::isnan(fval(i,j,k,m,n)));
2303                                 }
2304                             }
2305                         }
2306                     }
2307                 }
2308             }
2309             else if (NDIM == 6) {
2310                 for (int i=0; i<npt; ++i) {
2311                     c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2312                     for (int j=0; j<npt; ++j) {
2313                         c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2314                         for (int k=0; k<npt; ++k) {
2315                             c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2316                             for (int m=0; m<npt; ++m) {
2317                                 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2318                                 for (int n=0; n<npt; ++n) {
2319                                     c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy
2320                                     for (int p=0; p<npt; ++p) {
2321                                         c[5] = cell(5,0) + h*cell_width[5]*(l[5] + qx(p)); // zz
2322                                         fval(i,j,k,m,n,p) = f(c);
2323                                         MADNESS_ASSERT(!std::isnan(fval(i,j,k,m,n,p)));
2324                                     }
2325                                 }
2326                             }
2327                         }
2328                     }
2329                 }
2330             }
2331             else {
2332                 MADNESS_EXCEPTION("FunctionImpl: fcube: confused about NDIM?",NDIM);
2333             }
2334         }
2335     }
2336 
2337     template <typename T, std::size_t NDIM>
fcube(const keyT & key,T (* f)(const coordT &),const Tensor<double> & qx,tensorT & fval)2338     void FunctionImpl<T,NDIM>::fcube(const keyT& key, T (*f)(const coordT&), const Tensor<double>& qx, tensorT& fval) const {
2339         //      fcube(key,typename FunctionFactory<T,NDIM>::FunctorInterfaceWrapper(f) , qx, fval);
2340         madness::fcube(key,ElementaryInterface<T,NDIM>(f) , qx, fval);
2341     }
2342 
2343     template <typename T, std::size_t NDIM>
fcube(const keyT & key,const FunctionFunctorInterface<T,NDIM> & f,const Tensor<double> & qx,tensorT & fval)2344     void FunctionImpl<T,NDIM>::fcube(const keyT& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, tensorT& fval) const {
2345         madness::fcube(key,f,qx,fval);
2346     }
2347 
2348 
2349     /// project the functor into this functionimpl, and "return" a tree in reconstructed,
2350     /// rank-reduced form.
2351 
2352     /// @param[in]  key current FunctionNode
2353     /// @param[in]  do_refine
2354     /// @param[in]  specialpts  in case these are very spiky functions -- don't undersample
2355     template <typename T, std::size_t NDIM>
project_refine_op(const keyT & key,bool do_refine,const std::vector<Vector<double,NDIM>> & specialpts)2356     void FunctionImpl<T,NDIM>::project_refine_op(const keyT& key,
2357                                                  bool do_refine,
2358                                                  const std::vector<Vector<double,NDIM> >& specialpts) {
2359         //PROFILE_MEMBER_FUNC(FunctionImpl);
2360         if (do_refine && key.level() < max_refine_level) {
2361 
2362             // Restrict special points to this box
2363             std::vector<Vector<double,NDIM> > newspecialpts;
2364             if (key.level() < functor->special_level() && specialpts.size() > 0) {
2365                 BoundaryConditions<NDIM> bc = FunctionDefaults<NDIM>::get_bc();
2366                 std::vector<bool> bperiodic = bc.is_periodic();
2367                 for (unsigned int i = 0; i < specialpts.size(); ++i) {
2368                     coordT simpt;
2369                     user_to_sim(specialpts[i], simpt);
2370                     Key<NDIM> specialkey = simpt2key(simpt, key.level());
2371                     if (specialkey.is_neighbor_of(key,bperiodic)) {
2372                         newspecialpts.push_back(specialpts[i]);
2373                     }
2374                 }
2375             }
2376 
2377             // If refining compute scaling function coefficients and
2378             // norm of difference coefficients
2379             tensorT r, s0;
2380             double dnorm = 0.0;
2381             //////////////////////////if (newspecialpts.size() == 0)
2382             {
2383                 // Make in r child scaling function coeffs at level n+1
2384                 r = tensorT(cdata.v2k);
2385                 for (KeyChildIterator<NDIM> it(key); it; ++it) {
2386                     const keyT& child = it.key();
2387                     r(child_patch(child)) = project(child);
2388                 }
2389                 // Filter then test difference coeffs at level n
2390                 tensorT d = filter(r);
2391                 if (truncate_on_project) s0 = copy(d(cdata.s0));
2392                 d(cdata.s0) = T(0);
2393                 dnorm = d.normf();
2394             }
2395 
2396             // If have special points always refine.  If don't have special points
2397             // refine if difference norm is big
2398             if (newspecialpts.size() > 0 || dnorm >=truncate_tol(thresh,key.level())) {
2399                 coeffs.replace(key,nodeT(coeffT(),true)); // Insert empty node for parent
2400                 for (KeyChildIterator<NDIM> it(key); it; ++it) {
2401                     const keyT& child = it.key();
2402                     ProcessID p;
2403                     if (FunctionDefaults<NDIM>::get_project_randomize()) {
2404                         p = world.random_proc();
2405                     }
2406                     else {
2407                         p = coeffs.owner(child);
2408                     }
2409                     //PROFILE_BLOCK(proj_refine_send); // Too fine grain for routine profiling
2410                     woT::task(p, &implT::project_refine_op, child, do_refine, newspecialpts);
2411                 }
2412             }
2413             else {
2414                 if (truncate_on_project) {
2415                     coeffT s(s0,thresh,FunctionDefaults<NDIM>::get_tensor_type());
2416                     coeffs.replace(key,nodeT(s,false));
2417                 }
2418                 else {
2419                     coeffs.replace(key,nodeT(coeffT(),true)); // Insert empty node for parent
2420                     for (KeyChildIterator<NDIM> it(key); it; ++it) {
2421                         const keyT& child = it.key();
2422                         coeffT s(r(child_patch(child)),thresh,FunctionDefaults<NDIM>::get_tensor_type());
2423                         coeffs.replace(child,nodeT(s,false));
2424                     }
2425                 }
2426             }
2427         }
2428         else {
2429             coeffs.replace(key,nodeT(coeffT(project(key),targs),false));
2430         }
2431     }
2432 
2433     template <typename T, std::size_t NDIM>
add_scalar_inplace(T t,bool fence)2434     void FunctionImpl<T,NDIM>::add_scalar_inplace(T t, bool fence) {
2435         std::vector<long> v0(NDIM,0L);
2436         std::vector<long> v1(NDIM,1L);
2437         std::vector<Slice> s(NDIM,Slice(0,0));
2438         const TensorArgs full_args(-1.0,TT_FULL);
2439         if (is_compressed()) {
2440             if (world.rank() == coeffs.owner(cdata.key0)) {
2441                 typename dcT::iterator it = coeffs.find(cdata.key0).get();
2442                 MADNESS_ASSERT(it != coeffs.end());
2443                 nodeT& node = it->second;
2444                 MADNESS_ASSERT(node.has_coeff());
2445                 //                node.node_to_full_rank();
2446                 //                node.full_tensor_reference()(v0) += t*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2447                 //                node.node_to_low_rank();
2448                 change_tensor_type(node.coeff(),full_args);
2449                 node.coeff().full_tensor()(v0) += t*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2450                 change_tensor_type(node.coeff(),targs);
2451             }
2452         }
2453         else {
2454             for (typename dcT::iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
2455                 Level n = it->first.level();
2456                 nodeT& node = it->second;
2457                 if (node.has_coeff()) {
2458                     // this looks funny, but is necessary for GenTensor, since you can't access a
2459                     // single matrix element. Therefore make a (1^NDIM) tensor, convert to GenTensor, then
2460                     // add to the original one by adding a slice.
2461                     tensorT ttt(v1);
2462                     ttt=t*sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*n)));
2463                     coeffT tt(ttt,get_tensor_args());
2464                     node.coeff()(s) += tt;
2465                     // this was the original line:
2466                     // node.coeff().full_tensor()(v0) += t*sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*n)));
2467 
2468                 }
2469             }
2470         }
2471         if (fence) world.gop.fence();
2472     }
2473 
2474     template <typename T, std::size_t NDIM>
insert_zero_down_to_initial_level(const keyT & key)2475     void FunctionImpl<T,NDIM>::insert_zero_down_to_initial_level(const keyT& key) {
2476         PROFILE_MEMBER_FUNC(FunctionImpl);
2477         if (compressed) initial_level = std::max(initial_level,1); // Otherwise zero function is confused
2478         if (coeffs.is_local(key)) {
2479             if (compressed) {
2480                 if (key.level() == initial_level) {
2481                     coeffs.replace(key, nodeT(coeffT(), false));
2482                 }
2483                 else {
2484                     coeffs.replace(key, nodeT(coeffT(cdata.v2k,targs), true));
2485                 }
2486             }
2487             else {
2488                 if (key.level()<initial_level) {
2489                     coeffs.replace(key, nodeT(coeffT(), true));
2490                 }
2491                 else {
2492                     coeffs.replace(key, nodeT(coeffT(cdata.vk,targs), false));
2493                 }
2494             }
2495         }
2496         if (key.level() < initial_level) {
2497             for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2498                 insert_zero_down_to_initial_level(kit.key());
2499             }
2500         }
2501 
2502     }
2503 
2504 
2505     template <typename T, std::size_t NDIM>
truncate_spawn(const keyT & key,double tol)2506     Future<bool> FunctionImpl<T,NDIM>::truncate_spawn(const keyT& key, double tol) {
2507         //PROFILE_MEMBER_FUNC(FunctionImpl);
2508         typename dcT::iterator it = coeffs.find(key).get();
2509         if (it == coeffs.end()) {
2510             // In a standard tree all children would exist but some ops (transform)
2511             // can leave the tree in a messy state.  Just make the missing node as an
2512             // empty leaf.
2513             coeffs.replace(key,nodeT());
2514             it = coeffs.find(key).get();
2515         }
2516         nodeT& node = it->second;
2517         if (node.has_children()) {
2518             std::vector< Future<bool> > v = future_vector_factory<bool>(1<<NDIM);
2519             int i=0;
2520             for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
2521                 v[i] = woT::task(coeffs.owner(kit.key()), &implT::truncate_spawn, kit.key(), tol, TaskAttributes::generator());
2522             }
2523             return woT::task(world.rank(),&implT::truncate_op, key, tol, v);
2524         }
2525         else {
2526             // In compressed form leaves should not have coeffs ... however the
2527             // transform op could leave the tree with leaves that do have coeffs
2528             // in which case we want something sensible to happen
2529             //MADNESS_ASSERT(!node.has_coeff());
2530             if (node.has_coeff() && key.level()>1) {
2531                 double dnorm = node.coeff().normf();
2532                 if (dnorm < truncate_tol(tol,key)) {
2533                     node.clear_coeff();
2534                 }
2535             }
2536             return Future<bool>(node.has_coeff());
2537         }
2538     }
2539 
2540 
2541     template <typename T, std::size_t NDIM>
truncate_op(const keyT & key,double tol,const std::vector<Future<bool>> & v)2542     bool FunctionImpl<T,NDIM>::truncate_op(const keyT& key, double tol, const std::vector< Future<bool> >& v) {
2543         //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
2544         // If any child has coefficients, a parent cannot truncate
2545         for (int i=0; i<(1<<NDIM); ++i) if (v[i].get()) return true;
2546         nodeT& node = coeffs.find(key).get()->second;
2547 
2548         // Interior nodes should always have coeffs but transform might
2549         // leave empty interior nodes ... hence just force no coeffs to
2550         // be zero coeff unless it is a leaf.
2551         if (node.has_children() && !node.has_coeff()) node.set_coeff(coeffT(cdata.v2k,targs));
2552 
2553         if (key.level() > 1) { // >1 rather >0 otherwise reconstruct might get confused
2554             double dnorm = node.coeff().normf();
2555             if (dnorm < truncate_tol(tol,key)) {
2556                 node.clear_coeff();
2557                 if (node.has_children()) {
2558                     node.set_has_children(false);
2559                     for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2560                         coeffs.erase(kit.key());
2561                     }
2562                 }
2563             }
2564         }
2565         return node.has_coeff();
2566     }
2567 
2568 
2569     template <typename T, std::size_t NDIM>
print_tree(std::ostream & os,Level maxlevel)2570     void FunctionImpl<T,NDIM>::print_tree(std::ostream& os, Level maxlevel) const {
2571         if (world.rank() == 0) do_print_tree(cdata.key0, os, maxlevel);
2572         world.gop.fence();
2573         if (world.rank() == 0) os.flush();
2574         world.gop.fence();
2575     }
2576 
2577 
2578     template <typename T, std::size_t NDIM>
do_print_tree(const keyT & key,std::ostream & os,Level maxlevel)2579     void FunctionImpl<T,NDIM>::do_print_tree(const keyT& key, std::ostream& os, Level maxlevel) const {
2580         typename dcT::const_iterator it = coeffs.find(key).get();
2581         if (it == coeffs.end()) {
2582             //MADNESS_EXCEPTION("FunctionImpl: do_print_tree: null node pointer",0);
2583             for (int i=0; i<key.level(); ++i) os << "  ";
2584             os << key << "  missing --> " << coeffs.owner(key) << "\n";
2585         }
2586         else {
2587             const nodeT& node = it->second;
2588             for (int i=0; i<key.level(); ++i) os << "  ";
2589             os << key << "  " << node << " --> " << coeffs.owner(key) << "\n";
2590             if (key.level() < maxlevel  &&  node.has_children()) {
2591                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2592                     do_print_tree(kit.key(),os,maxlevel);
2593                 }
2594             }
2595         }
2596     }
2597 
2598     template <typename T, std::size_t NDIM>
print_tree_graphviz(std::ostream & os,Level maxlevel)2599     void FunctionImpl<T,NDIM>::print_tree_graphviz(std::ostream& os, Level maxlevel) const {
2600         if (world.rank() == 0) do_print_tree_graphviz(cdata.key0, os, maxlevel);
2601         world.gop.fence();
2602         if (world.rank() == 0) os.flush();
2603         world.gop.fence();
2604     }
2605 
2606     template <typename T, std::size_t NDIM>
do_print_tree_graphviz(const keyT & key,std::ostream & os,Level maxlevel)2607     void FunctionImpl<T,NDIM>::do_print_tree_graphviz(const keyT& key, std::ostream& os, Level maxlevel) const {
2608 
2609         struct uniqhash {
2610             static int64_t value(const keyT& key) {
2611                 int64_t result = 0;
2612                 for (int64_t j = 0; j <= key.level()-1; ++j) {
2613                     result += (1 << j*NDIM);
2614                 }
2615                 result += key.translation()[0];
2616                 return result;
2617             }
2618         };
2619 
2620         typename dcT::const_iterator it = coeffs.find(key).get();
2621         if (it != coeffs.end()) {
2622             const nodeT& node = it->second;
2623             if (key.level() < maxlevel  &&  node.has_children()) {
2624                 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2625                     os << uniqhash::value(key) << " -> " << uniqhash::value(kit.key()) << "\n";
2626                     do_print_tree_graphviz(kit.key(),os,maxlevel);
2627                 }
2628             }
2629         }
2630     }
2631 
2632     template <typename T, std::size_t NDIM>
project(const keyT & key)2633     Tensor<T> FunctionImpl<T,NDIM>::project(const keyT& key) const {
2634         //PROFILE_MEMBER_FUNC(FunctionImpl);
2635 
2636         if (not functor) MADNESS_EXCEPTION("FunctionImpl: project: confusion about function?",0);
2637 
2638         // if functor provides coeffs directly, awesome; otherwise use compute by yourself
2639         if (functor->provides_coeff()) return functor->coeff(key).full_tensor_copy();
2640 
2641         MADNESS_ASSERT(cdata.npt == cdata.k); // only necessary due to use of fast transform
2642         tensorT fval(cdata.vq,false); // this will be the returned result
2643         tensorT work(cdata.vk,false); // initially evaluate the function in here
2644         tensorT workq(cdata.vq,false); // initially evaluate the function in here
2645 
2646         // compute the values of the functor at the quadrature points and scale appropriately
2647         madness::fcube(key,*functor,cdata.quad_x,work);
2648         work.scale(sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*key.level()))));
2649         //return transform(work,cdata.quad_phiw);
2650         return fast_transform(work,cdata.quad_phiw,fval,workq);
2651     }
2652 
2653     template <typename T, std::size_t NDIM>
get_norm_tree_recursive(const keyT & key)2654     Future<double> FunctionImpl<T,NDIM>::get_norm_tree_recursive(const keyT& key) const {
2655         if (coeffs.probe(key)) {
2656             return Future<double>(coeffs.find(key).get()->second.get_norm_tree());
2657         }
2658         MADNESS_ASSERT(key.level());
2659         keyT parent = key.parent();
2660         return woT::task(coeffs.owner(parent), &implT::get_norm_tree_recursive, parent, TaskAttributes::hipri());
2661     }
2662 
2663 
2664     template <typename T, std::size_t NDIM>
sock_it_to_me(const keyT & key,const RemoteReference<FutureImpl<std::pair<keyT,coeffT>>> & ref)2665     void FunctionImpl<T,NDIM>::sock_it_to_me(const keyT& key,
2666                                              const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const {
2667         //PROFILE_MEMBER_FUNC(FunctionImpl);
2668         if (coeffs.probe(key)) {
2669             const nodeT& node = coeffs.find(key).get()->second;
2670             Future< std::pair<keyT,coeffT> > result(ref);
2671             if (node.has_coeff()) {
2672                 //madness::print("sock found it with coeff",key);
2673                 result.set(std::pair<keyT,coeffT>(key,node.coeff()));
2674             }
2675             else {
2676                 //madness::print("sock found it without coeff",key);
2677                 result.set(std::pair<keyT,coeffT>(key,coeffT()));
2678             }
2679         }
2680         else {
2681             keyT parent = key.parent();
2682             //madness::print("sock forwarding to parent",key,parent);
2683             //PROFILE_BLOCK(sitome_send); // Too fine grain for routine profiling
2684 	    if (coeffs.is_local(parent))
2685 	      woT::send(coeffs.owner(parent), &FunctionImpl<T,NDIM>::sock_it_to_me, parent, ref);
2686 	    else
2687 	      woT::task(coeffs.owner(parent), &FunctionImpl<T,NDIM>::sock_it_to_me, parent, ref, TaskAttributes::hipri());
2688         }
2689     }
2690 
2691     // like sock_it_to_me, but it replaces empty node with averaged coeffs from further down the tree
2692     template <typename T, std::size_t NDIM>
sock_it_to_me_too(const keyT & key,const RemoteReference<FutureImpl<std::pair<keyT,coeffT>>> & ref)2693     void FunctionImpl<T,NDIM>::sock_it_to_me_too(const keyT& key,
2694                                                  const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const {
2695         PROFILE_MEMBER_FUNC(FunctionImpl);
2696         if (coeffs.probe(key)) {
2697             const nodeT& node = coeffs.find(key).get()->second;
2698             Future< std::pair<keyT,coeffT> > result(ref);
2699             if (node.has_coeff()) {
2700                 result.set(std::pair<keyT,coeffT>(key,node.coeff()));
2701             }
2702             else {
2703                 result.set(std::pair<keyT,coeffT>(key,nodeT(coeffT(project(key),targs),false).coeff()));
2704             }
2705         }
2706         else {
2707             keyT parent = key.parent();
2708             //PROFILE_BLOCK(sitome2_send); // Too fine grain for routine profiling
2709             woT::task(coeffs.owner(parent), &FunctionImpl<T,NDIM>::sock_it_to_me_too, parent, ref, TaskAttributes::hipri());
2710         }
2711     }
2712 
2713 
2714     template <typename T, std::size_t NDIM>
eval(const Vector<double,NDIM> & xin,const keyT & keyin,const typename Future<T>::remote_refT & ref)2715     void FunctionImpl<T,NDIM>::eval(const Vector<double,NDIM>& xin,
2716                                     const keyT& keyin,
2717                                     const typename Future<T>::remote_refT& ref) {
2718 
2719         PROFILE_MEMBER_FUNC(FunctionImpl);
2720         // This is ugly.  We must figure out a clean way to use
2721         // owner computes rule from the container.
2722         Vector<double,NDIM> x = xin;
2723         keyT key = keyin;
2724         Vector<Translation,NDIM> l = key.translation();
2725         ProcessID me = world.rank();
2726         while (1) {
2727             ProcessID owner = coeffs.owner(key);
2728             if (owner != me) {
2729                 //PROFILE_BLOCK(eval_send); // Too fine grain for routine profiling
2730                 woT::task(owner, &implT::eval, x, key, ref, TaskAttributes::hipri());
2731                 return;
2732             }
2733             else {
2734                 typename dcT::futureT fut = coeffs.find(key);
2735                 typename dcT::iterator it = fut.get();
2736                 nodeT& node = it->second;
2737                 if (node.has_coeff()) {
2738                     Future<T>(ref).set(eval_cube(key.level(), x, node.coeff().full_tensor_copy()));
2739                     return;
2740                 }
2741                 else {
2742                     for (std::size_t i=0; i<NDIM; ++i) {
2743                         double xi = x[i]*2.0;
2744                         int li = int(xi);
2745                         if (li == 2) li = 1;
2746                         x[i] = xi - li;
2747                         l[i] = 2*l[i] + li;
2748                     }
2749                     key = keyT(key.level()+1,l);
2750                 }
2751             }
2752         }
2753         //MADNESS_EXCEPTION("should not be here",0);
2754     }
2755 
2756 
2757     template <typename T, std::size_t NDIM>
2758     std::pair<bool,T>
eval_local_only(const Vector<double,NDIM> & xin,Level maxlevel)2759     FunctionImpl<T,NDIM>::eval_local_only(const Vector<double,NDIM>& xin, Level maxlevel) {
2760         Vector<double,NDIM> x = xin;
2761         keyT key(0);
2762         Vector<Translation,NDIM> l = key.translation();
2763         const ProcessID me = world.rank();
2764         while (key.level() <= maxlevel) {
2765             if (coeffs.owner(key) == me) {
2766                 typename dcT::futureT fut = coeffs.find(key);
2767                 typename dcT::iterator it = fut.get();
2768                 if (it != coeffs.end()) {
2769                     nodeT& node = it->second;
2770                     if (node.has_coeff()) {
2771                         return std::pair<bool,T>(true,eval_cube(key.level(), x, node.coeff().full_tensor_copy()));
2772                     }
2773                 }
2774             }
2775             for (std::size_t i=0; i<NDIM; ++i) {
2776                 double xi = x[i]*2.0;
2777                 int li = int(xi);
2778                 if (li == 2) li = 1;
2779                 x[i] = xi - li;
2780                 l[i] = 2*l[i] + li;
2781             }
2782             key = keyT(key.level()+1,l);
2783         }
2784         return std::pair<bool,T>(false,0.0);
2785     }
2786 
2787     template <typename T, std::size_t NDIM>
evaldepthpt(const Vector<double,NDIM> & xin,const keyT & keyin,const typename Future<Level>::remote_refT & ref)2788     void FunctionImpl<T,NDIM>::evaldepthpt(const Vector<double,NDIM>& xin,
2789                                            const keyT& keyin,
2790                                            const typename Future<Level>::remote_refT& ref) {
2791 
2792         PROFILE_MEMBER_FUNC(FunctionImpl);
2793         // This is ugly.  We must figure out a clean way to use
2794         // owner computes rule from the container.
2795         Vector<double,NDIM> x = xin;
2796         keyT key = keyin;
2797         Vector<Translation,NDIM> l = key.translation();
2798         ProcessID me = world.rank();
2799         while (1) {
2800             ProcessID owner = coeffs.owner(key);
2801             if (owner != me) {
2802                 //PROFILE_BLOCK(eval_send); // Too fine grain for routine profiling
2803                 woT::task(owner, &implT::evaldepthpt, x, key, ref, TaskAttributes::hipri());
2804                 return;
2805             }
2806             else {
2807                 typename dcT::futureT fut = coeffs.find(key);
2808                 typename dcT::iterator it = fut.get();
2809                 nodeT& node = it->second;
2810                 if (node.has_coeff()) {
2811                     Future<Level>(ref).set(key.level());
2812                     return;
2813                 }
2814                 else {
2815                     for (std::size_t i=0; i<NDIM; ++i) {
2816                         double xi = x[i]*2.0;
2817                         int li = int(xi);
2818                         if (li == 2) li = 1;
2819                         x[i] = xi - li;
2820                         l[i] = 2*l[i] + li;
2821                     }
2822                     key = keyT(key.level()+1,l);
2823                 }
2824             }
2825         }
2826         //MADNESS_EXCEPTION("should not be here",0);
2827     }
2828 
2829     template <typename T, std::size_t NDIM>
evalR(const Vector<double,NDIM> & xin,const keyT & keyin,const typename Future<long>::remote_refT & ref)2830     void FunctionImpl<T,NDIM>::evalR(const Vector<double,NDIM>& xin,
2831                                      const keyT& keyin,
2832                                      const typename Future<long>::remote_refT& ref) {
2833 
2834     	PROFILE_MEMBER_FUNC(FunctionImpl);
2835     	// This is ugly.  We must figure out a clean way to use
2836     	// owner computes rule from the container.
2837     	Vector<double,NDIM> x = xin;
2838     	keyT key = keyin;
2839     	Vector<Translation,NDIM> l = key.translation();
2840     	ProcessID me = world.rank();
2841     	while (1) {
2842             ProcessID owner = coeffs.owner(key);
2843             if (owner != me) {
2844                 //PROFILE_BLOCK(eval_send); // Too fine grain for routine profiling
2845                 woT::task(owner, &implT::evalR, x, key, ref, TaskAttributes::hipri());
2846                 return;
2847             }
2848             else {
2849                 typename dcT::futureT fut = coeffs.find(key);
2850                 typename dcT::iterator it = fut.get();
2851                 nodeT& node = it->second;
2852                 if (node.has_coeff()) {
2853                     Future<long>(ref).set(node.coeff().rank());
2854                     return;
2855                 }
2856                 else {
2857                     for (std::size_t i=0; i<NDIM; ++i) {
2858                         double xi = x[i]*2.0;
2859                         int li = int(xi);
2860                         if (li == 2) li = 1;
2861                         x[i] = xi - li;
2862                         l[i] = 2*l[i] + li;
2863                     }
2864                     key = keyT(key.level()+1,l);
2865                 }
2866             }
2867     	}
2868     	//MADNESS_EXCEPTION("should not be here",0);
2869     }
2870 
2871 
2872     template <typename T, std::size_t NDIM>
tnorm(const tensorT & t,double * lo,double * hi)2873     void FunctionImpl<T,NDIM>::tnorm(const tensorT& t, double* lo, double* hi) const {
2874         //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
2875         // Chosen approach looks stupid but it is more accurate
2876         // than the simple approach of summing everything and
2877         // subtracting off the low-order stuff to get the high
2878         // order (assuming the high-order stuff is small relative
2879         // to the low-order)
2880         tensorT work = copy(t);
2881         tensorT tlo = work(cdata.sh);
2882         *lo = tlo.normf();
2883         tlo.fill(0.0);
2884         *hi = work.normf();
2885     }
2886 
2887     namespace detail {
2888         template <typename A, typename B>
2889         struct noop {
operatornoop2890             void operator()(const A& a, const B& b) const {};
2891 
serializenoop2892             template <typename Archive> void serialize(Archive& ar) {}
2893         };
2894 
2895         template <typename T, std::size_t NDIM>
2896         struct scaleinplace {
2897             T q;
scaleinplacescaleinplace2898             scaleinplace() {}
2899 	    // G++ 4.1.2 ICEs on BGP ... scaleinplace(T q) : q(q) {}
scaleinplacescaleinplace2900             scaleinplace(T q) {this->q = q;}
operatorscaleinplace2901             void operator()(const Key<NDIM>& key, Tensor<T>& t) const {
2902                 t.scale(q);
2903             }
operatorscaleinplace2904             void operator()(const Key<NDIM>& key, FunctionNode<T,NDIM>& node) const {
2905                 node.coeff().scale(q);
2906             }
serializescaleinplace2907             template <typename Archive> void serialize(Archive& ar) {
2908                 ar & q;
2909             }
2910         };
2911 
2912         template <typename T, std::size_t NDIM>
2913         struct squareinplace {
operatorsquareinplace2914             void operator()(const Key<NDIM>& key, Tensor<T>& t) const {
2915                 t.emul(t);
2916             }
serializesquareinplace2917             template <typename Archive> void serialize(Archive& ar) {}
2918         };
2919 
2920         template <typename T, std::size_t NDIM>
2921         struct absinplace {
operatorabsinplace2922             void operator()(const Key<NDIM>& key, Tensor<T>& t) const {t=abs(t);}
serializeabsinplace2923             template <typename Archive> void serialize(Archive& ar) {}
2924         };
2925 
2926         template <typename T, std::size_t NDIM>
2927         struct abssquareinplace {
operatorabssquareinplace2928             void operator()(const Key<NDIM>& key, Tensor<T>& t) const {abs(t.emul(t));}
serializeabssquareinplace2929             template <typename Archive> void serialize(Archive& ar) {}
2930         };
2931     }
2932 
2933     template <typename T, std::size_t NDIM>
scale_inplace(const T q,bool fence)2934     void FunctionImpl<T,NDIM>::scale_inplace(const T q, bool fence) {
2935         //        unary_op_coeff_inplace(detail::scaleinplace<T,NDIM>(q), fence);
2936         unary_op_node_inplace(detail::scaleinplace<T,NDIM>(q), fence);
2937     }
2938 
2939     template <typename T, std::size_t NDIM>
square_inplace(bool fence)2940     void FunctionImpl<T,NDIM>::square_inplace(bool fence) {
2941         //unary_op_value_inplace(&implT::autorefine_square_test, detail::squareinplace<T,NDIM>(), fence);
2942         unary_op_value_inplace(detail::squareinplace<T,NDIM>(), fence);
2943     }
2944 
2945     template <typename T, std::size_t NDIM>
abs_inplace(bool fence)2946     void FunctionImpl<T,NDIM>::abs_inplace(bool fence) {
2947         unary_op_value_inplace(detail::absinplace<T,NDIM>(), fence);
2948     }
2949 
2950     template <typename T, std::size_t NDIM>
abs_square_inplace(bool fence)2951     void FunctionImpl<T,NDIM>::abs_square_inplace(bool fence) {
2952         unary_op_value_inplace(detail::abssquareinplace<T,NDIM>(), fence);
2953     }
2954 
2955     template <typename T, std::size_t NDIM>
phi_for_mul(Level np,Translation lp,Level nc,Translation lc,Tensor<double> & phi)2956     void FunctionImpl<T,NDIM>::phi_for_mul(Level np, Translation lp, Level nc, Translation lc, Tensor<double>& phi) const {
2957         //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
2958         double p[200];
2959         double scale = pow(2.0,double(np-nc));
2960         for (int mu=0; mu<cdata.npt; ++mu) {
2961             double xmu = scale*(cdata.quad_x(mu)+lc) - lp;
2962             MADNESS_ASSERT(xmu>-1e-15 && xmu<(1+1e-15));
2963             legendre_scaling_functions(xmu,cdata.k,p);
2964             for (int i=0; i<k; ++i) phi(i,mu) = p[i];
2965         }
2966         phi.scale(pow(2.0,0.5*np));
2967     }
2968 
2969     template <typename T, std::size_t NDIM>
2970 
parent_to_child(const coeffT & s,const keyT & parent,const keyT & child)2971     const GenTensor<T> FunctionImpl<T,NDIM>::parent_to_child(const coeffT& s, const keyT& parent, const keyT& child) const {
2972         //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
2973         // An invalid parent/child means that they are out of the box
2974         // and it is the responsibility of the caller to worry about that
2975         // ... most likely the coefficients (s) are zero to reflect
2976         // zero B.C. so returning s makes handling this easy.
2977         if (parent == child || parent.is_invalid() || child.is_invalid()) return s;
2978 
2979         coeffT result = fcube_for_mul<T>(child, parent, s);
2980         result.scale(sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*child.level()))));
2981         result = transform(result,cdata.quad_phiw);
2982 
2983         return result;
2984     }
2985 
2986 
2987     template <typename T, std::size_t NDIM>
trace_local()2988     T FunctionImpl<T,NDIM>::trace_local() const {
2989         PROFILE_MEMBER_FUNC(FunctionImpl);
2990         std::vector<long> v0(NDIM,0);
2991         T sum = 0.0;
2992         if (compressed) {
2993             if (world.rank() == coeffs.owner(cdata.key0)) {
2994                 typename dcT::const_iterator it = coeffs.find(cdata.key0).get();
2995                 if (it != coeffs.end()) {
2996                     const nodeT& node = it->second;
2997                     if (node.has_coeff()) sum = node.coeff().full_tensor_copy()(v0);
2998                 }
2999             }
3000         }
3001         else {
3002             for (typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
3003                 const keyT& key = it->first;
3004                 const nodeT& node = it->second;
3005                 if (node.has_coeff()) sum += node.coeff().full_tensor_copy()(v0)*pow(0.5,NDIM*key.level()*0.5);
3006             }
3007         }
3008         return sum*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
3009     }
3010 
3011 
enforce_bc(bool is_periodic,Level n,Translation & l)3012     static inline bool enforce_bc(bool is_periodic, Level n, Translation& l) {
3013         Translation two2n = 1ul << n;
3014         if (l < 0) {
3015             if (is_periodic)
3016                 l += two2n; // Periodic BC
3017             else
3018                 return false; // Zero BC
3019         }
3020         else if (l >= two2n) {
3021             if (is_periodic)
3022                 l -= two2n; // Periodic BC
3023             else
3024                 return false; // Zero BC
3025         }
3026         return true;
3027     }
3028 
3029     template <typename T, std::size_t NDIM>
neighbor(const keyT & key,const Key<NDIM> & disp,const std::vector<bool> & is_periodic)3030     Key<NDIM> FunctionImpl<T,NDIM>::neighbor(const keyT& key, const Key<NDIM>& disp, const std::vector<bool>& is_periodic) const {
3031         Vector<Translation,NDIM> l = key.translation();
3032 
3033         for (std::size_t axis=0; axis<NDIM; ++axis) {
3034             l[axis] += disp.translation()[axis];
3035 
3036             //if (!enforce_bc(bc(axis,0), bc(axis,1), key.level(), l[axis])) {
3037             if (!enforce_bc(is_periodic[axis], key.level(), l[axis])) {
3038                 return keyT::invalid();
3039             }
3040         }
3041         return keyT(key.level(),l);
3042     }
3043 
3044 
3045     template <typename T, std::size_t NDIM>
3046     Future< std::pair< Key<NDIM>, GenTensor<T> > >
find_me(const Key<NDIM> & key)3047     FunctionImpl<T,NDIM>::find_me(const Key<NDIM>& key) const {
3048         //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling
3049         typedef std::pair< Key<NDIM>,coeffT > argT;
3050         Future<argT> result;
3051         //PROFILE_BLOCK(find_me_send); // Too fine grain for routine profiling
3052         woT::task(coeffs.owner(key), &implT::sock_it_to_me_too, key, result.remote_ref(world), TaskAttributes::hipri());
3053         return result;
3054     }
3055 
3056 
3057     template <typename T, std::size_t NDIM>
compress_spawn(const Key<NDIM> & key,bool nonstandard,bool keepleaves,bool redundant)3058     Future< GenTensor<T> > FunctionImpl<T,NDIM>::compress_spawn(const Key<NDIM>& key,
3059                                                                 bool nonstandard, bool keepleaves, bool redundant) {
3060         if (!coeffs.probe(key)) print("missing node",key);
3061         MADNESS_ASSERT(coeffs.probe(key));
3062 
3063         // get fetches remote data (here actually local)
3064         nodeT& node = coeffs.find(key).get()->second;
3065         if (node.has_children()) {
3066             std::vector< Future<coeffT > > v = future_vector_factory<coeffT >(1<<NDIM);
3067             int i=0;
3068             for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
3069                 //PROFILE_BLOCK(compress_send); // Too fine grain for routine profiling
3070                 // readily available
3071                 v[i] = woT::task(coeffs.owner(kit.key()), &implT::compress_spawn, kit.key(),
3072                                  nonstandard, keepleaves, redundant, TaskAttributes::hipri());
3073             }
3074             if (redundant) return woT::task(world.rank(),&implT::make_redundant_op, key, v);
3075             return woT::task(world.rank(),&implT::compress_op, key, v, nonstandard, redundant);
3076         }
3077         else {
3078             Future<coeffT > result(node.coeff());
3079             if (!keepleaves) node.clear_coeff();
3080             return result;
3081         }
3082     }
3083 
3084     template <typename T, std::size_t NDIM>
plot_cube_kernel(archive::archive_ptr<Tensor<T>> ptr,const keyT & key,const coordT & plotlo,const coordT & plothi,const std::vector<long> & npt,bool eval_refine)3085     void FunctionImpl<T,NDIM>::plot_cube_kernel(archive::archive_ptr< Tensor<T> > ptr,
3086                                                 const keyT& key,
3087                                                 const coordT& plotlo, const coordT& plothi, const std::vector<long>& npt,
3088                                                 bool eval_refine) const {
3089 
3090         Tensor<T>& r = *ptr;
3091 
3092         coordT h; // Increment between points in each dimension
3093         for (std::size_t i=0; i<NDIM; ++i) {
3094             if (npt[i] > 1) {
3095                 h[i] = (plothi[i]-plotlo[i])/(npt[i]-1);
3096             }
3097             else {
3098                 MADNESS_ASSERT(plotlo[i] == plothi[i]);
3099                 h[i] = 0.0;
3100             }
3101         }
3102 
3103         const Level n = key.level();
3104         const Vector<Translation,NDIM>& l = key.translation();
3105         const double twon = pow(2.0,double(n));
3106         const tensorT& coeff = coeffs.find(key).get()->second.coeff().full_tensor_copy(); // Ugh!
3107         //        const tensorT coeff = coeffs.find(key).get()->second.full_tensor_copy(); // Ugh!
3108         long ind[NDIM];
3109         coordT x;
3110 
3111         coordT boxlo, boxhi;
3112         Vector<int,NDIM> boxnpt;
3113         double fac = pow(0.5,double(key.level()));
3114         int npttotal = 1;
3115         for (std::size_t d=0; d<NDIM; ++d) {
3116             // Coords of box
3117             boxlo[d] = fac*key.translation()[d];
3118             boxhi[d] = boxlo[d]+fac;
3119 
3120             if (boxlo[d] > plothi[d] || boxhi[d] < plotlo[d]) {
3121                 // Discard boxes out of the plot range
3122                 npttotal = boxnpt[d] = 0;
3123                 //print("OO range?");
3124                 break;
3125             }
3126             else if (npt[d] == 1) {
3127                 // This dimension is only a single point
3128                 boxlo[d] = boxhi[d] = plotlo[d];
3129                 boxnpt[d] = 1;
3130             }
3131             else {
3132                 // Restrict to plot range
3133                 boxlo[d] = std::max(boxlo[d],plotlo[d]);
3134                 boxhi[d] = std::min(boxhi[d],plothi[d]);
3135 
3136                 // Round lo up to next plot point; round hi down
3137                 double xlo = long((boxlo[d]-plotlo[d])/h[d])*h[d] + plotlo[d];
3138                 if (xlo < boxlo[d]) xlo += h[d];
3139                 boxlo[d] =  xlo;
3140                 double xhi = long((boxhi[d]-plotlo[d])/h[d])*h[d] + plotlo[d];
3141                 if (xhi > boxhi[d]) xhi -= h[d];
3142                 // MADNESS_ASSERT(xhi >= xlo);  // nope
3143                 boxhi[d] = xhi;
3144                 boxnpt[d] = long(round((boxhi[d] - boxlo[d])/h[d])) + 1;
3145             }
3146             npttotal *= boxnpt[d];
3147         }
3148         //print("    box", boxlo, boxhi, boxnpt, npttotal);
3149         if (npttotal > 0) {
3150             for (IndexIterator it(boxnpt); it; ++it) {
3151                 for (std::size_t d=0; d<NDIM; ++d) {
3152                     double xd = boxlo[d] + it[d]*h[d]; // Sim. coords of point
3153                     x[d] = twon*xd - l[d]; // Offset within box
3154                     MADNESS_ASSERT(x[d]>=0.0 && x[d] <=1.0);  // sanity
3155                     if (npt[d] > 1) {
3156                         ind[d] = long(round((xd-plotlo[d])/h[d])); // Index of plot point
3157                     }
3158                     else {
3159                         ind[d] = 0;
3160                     }
3161                     MADNESS_ASSERT(ind[d]>=0 && ind[d]<npt[d]); // sanity
3162                 }
3163                 if (eval_refine) {
3164                     r(ind) = n;
3165                 }
3166                 else {
3167                     T tmp = eval_cube(n, x, coeff);
3168                     r(ind) = tmp;
3169                     //print("    eval", ind, tmp, r(ind));
3170                 }
3171             }
3172         }
3173     }
3174 
3175     /// Set plot_refine=true to get a plot of the refinment levels of
3176     /// the given function (defaulted to false in prototype).
3177     template <typename T, std::size_t NDIM>
eval_plot_cube(const coordT & plotlo,const coordT & plothi,const std::vector<long> & npt,const bool eval_refine)3178     Tensor<T> FunctionImpl<T,NDIM>::eval_plot_cube(const coordT& plotlo,
3179                                                    const coordT& plothi,
3180                                                    const std::vector<long>& npt,
3181                                                    const bool eval_refine) const {
3182         PROFILE_MEMBER_FUNC(FunctionImpl);
3183         Tensor<T> r(NDIM, &npt[0]);
3184         //r(___) = 99.0;
3185         MADNESS_ASSERT(!compressed);
3186 
3187         for (typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
3188             const keyT& key = it->first;
3189             const nodeT& node = it->second;
3190             if (node.has_coeff()) {
3191                 woT::task(world.rank(), &implT::plot_cube_kernel,
3192                           archive::archive_ptr< Tensor<T> >(&r), key, plotlo, plothi, npt, eval_refine);
3193             }
3194         }
3195 
3196         //        ITERATOR(r, if (r(IND) == 99.0) {print("BAD", IND); error("bad",0);});
3197 
3198         world.taskq.fence();
3199         world.gop.sum(r.ptr(), r.size());
3200         world.gop.fence();
3201 
3202         return r;
3203     }
3204 
dxprintvalue(FILE * f,const double t)3205     static inline void dxprintvalue(FILE* f, const double t) {
3206         fprintf(f,"%.6e\n",t);
3207     }
3208 
dxprintvalue(FILE * f,const double_complex & t)3209     static inline void dxprintvalue(FILE* f, const double_complex& t) {
3210         fprintf(f,"%.6e %.6e\n", t.real(), t.imag());
3211     }
3212 
3213     template <typename T, std::size_t NDIM>
plotdx(const Function<T,NDIM> & function,const char * filename,const Tensor<double> & cell,const std::vector<long> & npt,bool binary)3214     void plotdx(const Function<T,NDIM>& function,
3215                 const char* filename,
3216                 const Tensor<double>& cell,
3217                 const std::vector<long>& npt,
3218                 bool binary) {
3219         PROFILE_FUNC;
3220         MADNESS_ASSERT(NDIM<=6);
3221         const char* element[6] = {"lines","quads","cubes","cubes4D","cubes5D","cubes6D"};
3222 
3223         function.verify();
3224         World& world = const_cast< Function<T,NDIM>& >(function).world();
3225         FILE *f=0;
3226         if (world.rank() == 0) {
3227             f = fopen(filename, "w");
3228             if (!f) MADNESS_EXCEPTION("plotdx: failed to open the plot file", 0);
3229 
3230             fprintf(f,"object 1 class gridpositions counts ");
3231             for (std::size_t d=0; d<NDIM; ++d) fprintf(f," %ld",npt[d]);
3232             fprintf(f,"\n");
3233 
3234             fprintf(f,"origin ");
3235             for (std::size_t d=0; d<NDIM; ++d) fprintf(f, " %.6e", cell(d,0));
3236             fprintf(f,"\n");
3237 
3238             for (std::size_t d=0; d<NDIM; ++d) {
3239                 fprintf(f,"delta ");
3240                 for (std::size_t c=0; c<d; ++c) fprintf(f, " 0");
3241                 double h = 0.0;
3242                 if (npt[d]>1) h = (cell(d,1)-cell(d,0))/(npt[d]-1);
3243                 fprintf(f," %.6e", h);
3244                 for (std::size_t c=d+1; c<NDIM; ++c) fprintf(f, " 0");
3245                 fprintf(f,"\n");
3246             }
3247             fprintf(f,"\n");
3248 
3249             fprintf(f,"object 2 class gridconnections counts ");
3250             for (std::size_t d=0; d<NDIM; ++d) fprintf(f," %ld",npt[d]);
3251             fprintf(f,"\n");
3252             fprintf(f, "attribute \"element type\" string \"%s\"\n", element[NDIM-1]);
3253             fprintf(f, "attribute \"ref\" string \"positions\"\n");
3254             fprintf(f,"\n");
3255 
3256             int npoint = 1;
3257             for (std::size_t d=0; d<NDIM; ++d) npoint *= npt[d];
3258             const char* iscomplex = "";
3259             if (TensorTypeData<T>::iscomplex) iscomplex = "category complex";
3260             const char* isbinary = "";
3261             if (binary) isbinary = "binary";
3262             fprintf(f,"object 3 class array type double %s rank 0 items %d %s data follows\n",
3263                     iscomplex, npoint, isbinary);
3264         }
3265 
3266         world.gop.fence();
3267         Tensor<T> r = function.eval_cube(cell, npt);
3268 
3269         if (world.rank() == 0) {
3270             if (binary) {
3271                 // This assumes that the values are double precision
3272                 fflush(f);
3273                 fwrite((void *) r.ptr(), sizeof(T), r.size(), f);
3274                 fflush(f);
3275             }
3276             else {
3277                 for (IndexIterator it(npt); it; ++it) {
3278                     //fprintf(f,"%.6e\n",r(*it));
3279                     dxprintvalue(f,r(*it));
3280                 }
3281             }
3282             fprintf(f,"\n");
3283 
3284             fprintf(f,"object \"%s\" class field\n",filename);
3285             fprintf(f,"component \"positions\" value 1\n");
3286             fprintf(f,"component \"connections\" value 2\n");
3287             fprintf(f,"component \"data\" value 3\n");
3288             fprintf(f,"\nend\n");
3289             fclose(f);
3290         }
3291         world.gop.fence();
3292     }
3293 
3294     template <std::size_t NDIM>
set_defaults(World & world)3295     void FunctionDefaults<NDIM>::set_defaults(World& world) {
3296         k = 6;
3297         thresh = 1e-4;
3298         initial_level = 2;
3299         special_level = 3;
3300         max_refine_level = 30;
3301         truncate_mode = 0;
3302         refine = true;
3303         autorefine = true;
3304         debug = false;
3305         truncate_on_project = true;
3306         apply_randomize = false;
3307         project_randomize = false;
3308         bc = BoundaryConditions<NDIM>(BC_FREE);
3309         tt = TT_FULL;
3310         cell = Tensor<double>(NDIM,2);
3311         cell(_,1) = 1.0;
3312         recompute_cell_info();
3313 
3314         //pmap = std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >(new WorldDCDefaultPmap< Key<NDIM> >(world));
3315         pmap = std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >(new madness::LevelPmap< Key<NDIM> >(world));
3316         //pmap = std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >(new SimplePmap< Key<NDIM> >(world));
3317     }
3318     template <std::size_t NDIM>
print()3319     void FunctionDefaults<NDIM>::print(){
3320     		std::cout << "Function Defaults:" << std::endl;
3321     		std::cout << "                      Dimension " <<  ": " <<  NDIM << std::endl;
3322     		std::cout << "                               k" <<  ": " << k << std::endl;
3323     		std::cout << "                          thresh" <<  ": " << thresh << std::endl;
3324     		std::cout << "                   initial_level" <<  ": " << initial_level << std::endl;
3325     		std::cout << "                   special_level" <<  ": " << special_level << std::endl;
3326     		std::cout << "                max_refine_level" <<  ": " << max_refine_level << std::endl;
3327     		std::cout << "                   truncate_mode" <<  ": " << truncate_mode << std::endl;
3328     		std::cout << "                          refine" <<  ": " << refine << std::endl;
3329     		std::cout << "                      autorefine" <<  ": " << autorefine << std::endl;
3330     		std::cout << "                           debug" <<  ": " << debug << std::endl;
3331     		std::cout << "             truncate_on_project" <<  ": " << truncate_on_project << std::endl;
3332     		std::cout << "                 apply_randomize" <<  ": " << apply_randomize << std::endl;
3333     		std::cout << "               project_randomize" <<  ": " << project_randomize << std::endl;
3334     		std::cout << "                              bc" <<  ": " << bc << std::endl;
3335     		std::cout << "                              tt" <<  ": " << tt << std::endl;
3336     		std::cout << "                            cell" <<  ": " << cell << std::endl;
3337     }
3338 
3339     template <typename T, std::size_t NDIM>
3340     const FunctionCommonData<T,NDIM>* FunctionCommonData<T,NDIM>::data[MAXK] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
3341 
3342     template <std::size_t NDIM> int FunctionDefaults<NDIM>::k;
3343     template <std::size_t NDIM> double FunctionDefaults<NDIM>::thresh;
3344     template <std::size_t NDIM> int FunctionDefaults<NDIM>::initial_level;
3345     template <std::size_t NDIM> int FunctionDefaults<NDIM>::special_level;
3346     template <std::size_t NDIM> int FunctionDefaults<NDIM>::max_refine_level;
3347     template <std::size_t NDIM> int FunctionDefaults<NDIM>::truncate_mode;
3348     template <std::size_t NDIM> bool FunctionDefaults<NDIM>::refine;
3349     template <std::size_t NDIM> bool FunctionDefaults<NDIM>::autorefine;
3350     template <std::size_t NDIM> bool FunctionDefaults<NDIM>::debug;
3351     template <std::size_t NDIM> bool FunctionDefaults<NDIM>::truncate_on_project;
3352     template <std::size_t NDIM> bool FunctionDefaults<NDIM>::apply_randomize;
3353     template <std::size_t NDIM> bool FunctionDefaults<NDIM>::project_randomize;
3354     template <std::size_t NDIM> BoundaryConditions<NDIM> FunctionDefaults<NDIM>::bc;
3355     template <std::size_t NDIM> TensorType FunctionDefaults<NDIM>::tt;
3356     template <std::size_t NDIM> Tensor<double> FunctionDefaults<NDIM>::cell;
3357     template <std::size_t NDIM> Tensor<double> FunctionDefaults<NDIM>::cell_width;
3358     template <std::size_t NDIM> Tensor<double> FunctionDefaults<NDIM>::rcell_width;
3359     template <std::size_t NDIM> double FunctionDefaults<NDIM>::cell_volume;
3360     template <std::size_t NDIM> double FunctionDefaults<NDIM>::cell_min_width;
3361     template <std::size_t NDIM> std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > > FunctionDefaults<NDIM>::pmap;
3362 
3363     template <std::size_t NDIM> std::vector< Key<NDIM> > Displacements<NDIM>::disp;
3364     template <std::size_t NDIM> std::vector< Key<NDIM> > Displacements<NDIM>::disp_periodicsum[64];
3365 
3366 }
3367 
3368 #endif // MADNESS_MRA_MRAIMPL_H__INCLUDED
3369