1 /* 2 This file is part of MADNESS. 3 4 Copyright (C) 2007,2010 Oak Ridge National Laboratory 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License as published by 8 the Free Software Foundation; either version 2 of the License, or 9 (at your option) any later version. 10 11 This program is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 GNU General Public License for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with this program; if not, write to the Free Software 18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 19 20 For more information please contact: 21 22 Robert J. Harrison 23 Oak Ridge National Laboratory 24 One Bethel Valley Road 25 P.O. Box 2008, MS-6367 26 27 email: harrisonrj@ornl.gov 28 tel: 865-241-3937 29 fax: 865-572-0680 30 */ 31 32 #ifndef MADNESS_MRA_FUNCIMPL_H__INCLUDED 33 #define MADNESS_MRA_FUNCIMPL_H__INCLUDED 34 35 /// \file funcimpl.h 36 /// \brief Provides FunctionCommonData, FunctionImpl and FunctionFactory 37 38 #include <iostream> 39 #include <type_traits> 40 #include <madness/world/MADworld.h> 41 #include <madness/world/print.h> 42 #include <madness/misc/misc.h> 43 #include <madness/tensor/tensor.h> 44 #include <madness/tensor/gentensor.h> 45 46 #include <madness/mra/function_common_data.h> 47 #include <madness/mra/indexit.h> 48 #include <madness/mra/key.h> 49 #include <madness/mra/funcdefaults.h> 50 #include <madness/mra/function_factory.h> 51 52 #include "leafop.h" 53 54 namespace madness { 55 template <typename T, std::size_t NDIM> 56 class DerivativeBase; 57 58 template<typename T, std::size_t NDIM> 59 class FunctionImpl; 60 61 template<typename T, std::size_t NDIM> 62 class FunctionNode; 63 64 template<typename T, std::size_t NDIM> 65 class Function; 66 67 template<typename T, std::size_t NDIM> 68 class FunctionFactory; 69 70 template<typename T, std::size_t NDIM, std::size_t MDIM> 71 class CompositeFunctorInterface; 72 73 template<int D> 74 class LoadBalImpl; 75 76 } 77 78 namespace madness { 79 80 81 /// A simple process map 82 template<typename keyT> 83 class SimplePmap : public WorldDCPmapInterface<keyT> { 84 private: 85 const int nproc; 86 const ProcessID me; 87 88 public: SimplePmap(World & world)89 SimplePmap(World& world) : nproc(world.nproc()), me(world.rank()) 90 { } 91 owner(const keyT & key)92 ProcessID owner(const keyT& key) const { 93 if (key.level() == 0) 94 return 0; 95 else 96 return key.hash() % nproc; 97 } 98 }; 99 100 /// A pmap that locates children on odd levels with their even level parents 101 template <typename keyT> 102 class LevelPmap : public WorldDCPmapInterface<keyT> { 103 private: 104 const int nproc; 105 public: LevelPmap()106 LevelPmap() : nproc(0) {}; 107 LevelPmap(World & world)108 LevelPmap(World& world) : nproc(world.nproc()) {} 109 110 /// Find the owner of a given key owner(const keyT & key)111 ProcessID owner(const keyT& key) const { 112 Level n = key.level(); 113 if (n == 0) return 0; 114 hashT hash; 115 if (n <= 3 || (n&0x1)) hash = key.hash(); 116 else hash = key.parent().hash(); 117 return hash%nproc; 118 } 119 }; 120 121 122 /// FunctionNode holds the coefficients, etc., at each node of the 2^NDIM-tree 123 template<typename T, std::size_t NDIM> 124 class FunctionNode { 125 public: 126 typedef GenTensor<T> coeffT; 127 typedef Tensor<T> tensorT; 128 private: 129 // Should compile OK with these volatile but there should 130 // be no need to set as volatile since the container internally 131 // stores the entire entry as volatile 132 133 coeffT _coeffs; ///< The coefficients, if any 134 double _norm_tree; ///< After norm_tree will contain norm of coefficients summed up tree 135 bool _has_children; ///< True if there are children 136 coeffT buffer; ///< The coefficients, if any 137 138 public: 139 typedef WorldContainer<Key<NDIM> , FunctionNode<T, NDIM> > dcT; ///< Type of container holding the nodes 140 /// Default constructor makes node without coeff or children FunctionNode()141 FunctionNode() : 142 _coeffs(), _norm_tree(1e300), _has_children(false) { 143 } 144 145 /// Constructor from given coefficients with optional children 146 147 /// Note that only a shallow copy of the coeff are taken so 148 /// you should pass in a deep copy if you want the node to 149 /// take ownership. 150 explicit 151 FunctionNode(const coeffT& coeff, bool has_children = false) : _coeffs(coeff)152 _coeffs(coeff), _norm_tree(1e300), _has_children(has_children) { 153 } 154 155 explicit FunctionNode(const coeffT & coeff,double norm_tree,bool has_children)156 FunctionNode(const coeffT& coeff, double norm_tree, bool has_children) : 157 _coeffs(coeff), _norm_tree(norm_tree), _has_children(has_children) { 158 } 159 FunctionNode(const FunctionNode<T,NDIM> & other)160 FunctionNode(const FunctionNode<T, NDIM>& other) { 161 *this = other; 162 } 163 164 FunctionNode<T, NDIM>& 165 operator=(const FunctionNode<T, NDIM>& other) { 166 if (this != &other) { 167 coeff() = copy(other.coeff()); 168 _norm_tree = other._norm_tree; 169 _has_children = other._has_children; 170 } 171 return *this; 172 } 173 174 /// Copy with possible type conversion of coefficients, copying all other state 175 176 /// Choose to not overload copy and type conversion operators 177 /// so there are no automatic type conversions. 178 template<typename Q> 179 FunctionNode<Q, NDIM> convert()180 convert() const { 181 return FunctionNode<Q, NDIM> (madness::convert<Q,T>(coeff()), _has_children); 182 } 183 184 /// Returns true if there are coefficients in this node 185 bool has_coeff()186 has_coeff() const { 187 return _coeffs.has_data(); 188 } 189 190 191 /// Returns true if this node has children 192 bool has_children()193 has_children() const { 194 return _has_children; 195 } 196 197 /// Returns true if this does not have children 198 bool is_leaf()199 is_leaf() const { 200 return !_has_children; 201 } 202 203 /// Returns true if this node is invalid (no coeffs and no children) 204 bool is_invalid()205 is_invalid() const { 206 return !(has_coeff() || has_children()); 207 } 208 209 /// Returns a non-const reference to the tensor containing the coeffs 210 211 /// Returns an empty tensor if there are no coefficients. 212 coeffT& coeff()213 coeff() { 214 MADNESS_ASSERT(_coeffs.ndim() == -1 || (_coeffs.dim(0) <= 2 215 * MAXK && _coeffs.dim(0) >= 0)); 216 return const_cast<coeffT&>(_coeffs); 217 } 218 219 /// Returns a const reference to the tensor containing the coeffs 220 221 /// Returns an empty tensor if there are no coefficeints. 222 const coeffT& coeff()223 coeff() const { 224 return const_cast<const coeffT&>(_coeffs); 225 } 226 227 /// Returns the number of coefficients in this node size()228 size_t size() const { 229 return _coeffs.size(); 230 } 231 232 public: 233 234 /// reduces the rank of the coefficients (if applicable) reduceRank(const double & eps)235 void reduceRank(const double& eps) { 236 _coeffs.reduce_rank(eps); 237 } 238 239 /// Sets \c has_children attribute to value of \c flag. set_has_children(bool flag)240 void set_has_children(bool flag) { 241 _has_children = flag; 242 } 243 244 /// Sets \c has_children attribute to true recurring up to ensure connected set_has_children_recursive(const typename FunctionNode<T,NDIM>::dcT & c,const Key<NDIM> & key)245 void set_has_children_recursive(const typename FunctionNode<T,NDIM>::dcT& c,const Key<NDIM>& key) { 246 //madness::print(" set_chi_recu: ", key, *this); 247 //PROFILE_MEMBER_FUNC(FunctionNode); // Too fine grain for routine profiling 248 if (!(has_children() || has_coeff() || key.level()==0)) { 249 // If node already knows it has children or it has 250 // coefficients then it must already be connected to 251 // its parent. If not, the node was probably just 252 // created for this operation and must be connected to 253 // its parent. 254 Key<NDIM> parent = key.parent(); 255 // Task on next line used to be TaskAttributes::hipri()) ... but deferring execution of this 256 // makes sense since it is not urgent and lazy connection will likely mean that less forwarding 257 // will happen since the upper level task will have already made the connection. 258 const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent); 259 //const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent); 260 //madness::print(" set_chi_recu: forwarding",key,parent); 261 } 262 _has_children = true; 263 } 264 265 /// Sets \c has_children attribute to value of \c !flag set_is_leaf(bool flag)266 void set_is_leaf(bool flag) { 267 _has_children = !flag; 268 } 269 270 /// Takes a \em shallow copy of the coeff --- same as \c this->coeff()=coeff set_coeff(const coeffT & coeffs)271 void set_coeff(const coeffT& coeffs) { 272 coeff() = coeffs; 273 if ((_coeffs.has_data()) and ((_coeffs.dim(0) < 0) || (_coeffs.dim(0)>2*MAXK))) { 274 print("set_coeff: may have a problem"); 275 print("set_coeff: coeff.dim[0] =", coeffs.dim(0), ", 2* MAXK =", 2*MAXK); 276 } 277 MADNESS_ASSERT(coeffs.dim(0)<=2*MAXK && coeffs.dim(0)>=0); 278 } 279 280 /// Clears the coefficients (has_coeff() will subsequently return false) clear_coeff()281 void clear_coeff() { 282 coeff()=coeffT(); 283 } 284 285 /// Scale the coefficients of this node 286 template <typename Q> scale(Q a)287 void scale(Q a) { 288 _coeffs.scale(a); 289 } 290 291 /// Sets the value of norm_tree set_norm_tree(double norm_tree)292 void set_norm_tree(double norm_tree) { 293 _norm_tree = norm_tree; 294 } 295 296 /// Gets the value of norm_tree get_norm_tree()297 double get_norm_tree() const { 298 return _norm_tree; 299 } 300 301 302 /// General bi-linear operation --- this = this*alpha + other*beta 303 304 /// This/other may not have coefficients. Has_children will be 305 /// true in the result if either this/other have children. 306 template <typename Q, typename R> gaxpy_inplace(const T & alpha,const FunctionNode<Q,NDIM> & other,const R & beta)307 void gaxpy_inplace(const T& alpha, const FunctionNode<Q,NDIM>& other, const R& beta) { 308 //PROFILE_MEMBER_FUNC(FuncNode); // Too fine grain for routine profiling 309 if (other.has_children()) 310 _has_children = true; 311 if (has_coeff()) { 312 if (other.has_coeff()) { 313 coeff().gaxpy(alpha,other.coeff(),beta); 314 } 315 else { 316 coeff().scale(alpha); 317 } 318 } 319 else if (other.has_coeff()) { 320 coeff() = other.coeff()*beta; //? Is this the correct type conversion? 321 } 322 } 323 324 /// Accumulate inplace and if necessary connect node to parent accumulate2(const tensorT & t,const typename FunctionNode<T,NDIM>::dcT & c,const Key<NDIM> & key)325 double accumulate2(const tensorT& t, const typename FunctionNode<T,NDIM>::dcT& c, 326 const Key<NDIM>& key) { 327 double cpu0=cpu_time(); 328 if (has_coeff()) { 329 MADNESS_ASSERT(coeff().tensor_type()==TT_FULL); 330 // if (coeff().type==TT_FULL) { 331 coeff() += coeffT(t,-1.0,TT_FULL); 332 // } else { 333 // tensorT cc=coeff().full_tensor_copy();; 334 // cc += t; 335 // coeff()=coeffT(cc,args); 336 // } 337 } 338 else { 339 // No coeff and no children means the node is newly 340 // created for this operation and therefore we must 341 // tell its parent that it exists. 342 coeff() = coeffT(t,-1.0,TT_FULL); 343 // coeff() = copy(t); 344 // coeff() = coeffT(t,args); 345 if ((!_has_children) && key.level()> 0) { 346 Key<NDIM> parent = key.parent(); 347 if (c.is_local(parent)) 348 const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent); 349 else 350 const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent); 351 } 352 } 353 double cpu1=cpu_time(); 354 return cpu1-cpu0; 355 } 356 357 358 /// Accumulate inplace and if necessary connect node to parent accumulate(const coeffT & t,const typename FunctionNode<T,NDIM>::dcT & c,const Key<NDIM> & key,const TensorArgs & args)359 double accumulate(const coeffT& t, const typename FunctionNode<T,NDIM>::dcT& c, 360 const Key<NDIM>& key, const TensorArgs& args) { 361 double cpu0=cpu_time(); 362 if (has_coeff()) { 363 364 #if 1 365 coeff().add_SVD(t,args.thresh); 366 if (buffer.rank()<coeff().rank()) { 367 if (buffer.has_data()) { 368 buffer.add_SVD(coeff(),args.thresh); 369 } else { 370 buffer=copy(coeff()); 371 } 372 coeff()=coeffT(); 373 } 374 375 #else 376 // always do low rank 377 coeff().add_SVD(t,args.thresh); 378 379 #endif 380 381 } else { 382 // No coeff and no children means the node is newly 383 // created for this operation and therefore we must 384 // tell its parent that it exists. 385 coeff() = copy(t); 386 if ((!_has_children) && key.level()> 0) { 387 Key<NDIM> parent = key.parent(); 388 if (c.is_local(parent)) 389 const_cast<dcT&>(c).send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent); 390 else 391 const_cast<dcT&>(c).task(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, c, parent); 392 } 393 } 394 double cpu1=cpu_time(); 395 return cpu1-cpu0; 396 } 397 consolidate_buffer(const TensorArgs & args)398 void consolidate_buffer(const TensorArgs& args) { 399 if ((coeff().has_data()) and (buffer.has_data())) { 400 coeff().add_SVD(buffer,args.thresh); 401 } else if (buffer.has_data()) { 402 coeff()=buffer; 403 } 404 buffer=coeffT(); 405 } 406 trace_conj(const FunctionNode<T,NDIM> & rhs)407 T trace_conj(const FunctionNode<T,NDIM>& rhs) const { 408 return this->_coeffs.trace_conj((rhs._coeffs)); 409 } 410 411 template <typename Archive> serialize(Archive & ar)412 void serialize(Archive& ar) { 413 ar & coeff() & _has_children & _norm_tree; 414 } 415 416 }; 417 418 template <typename T, std::size_t NDIM> 419 std::ostream& operator<<(std::ostream& s, const FunctionNode<T,NDIM>& node) { 420 s << "(has_coeff=" << node.has_coeff() << ", has_children=" << node.has_children() << ", norm="; 421 double norm = node.has_coeff() ? node.coeff().normf() : 0.0; 422 if (norm < 1e-12) 423 norm = 0.0; 424 double nt = node.get_norm_tree(); 425 if (nt == 1e300) nt = 0.0; 426 s << norm << ", norm_tree=" << nt << "), rank="<< node.coeff().rank()<<")"; 427 return s; 428 } 429 430 431 /// returns true if the result of a hartree_product is a leaf node (compute norm & error) 432 template<typename T, size_t NDIM> 433 struct hartree_leaf_op { 434 435 typedef FunctionImpl<T,NDIM> implT; 436 const FunctionImpl<T,NDIM>* f; 437 long k; do_error_leaf_ophartree_leaf_op438 bool do_error_leaf_op() const {return false;} 439 hartree_leaf_ophartree_leaf_op440 hartree_leaf_op() {} hartree_leaf_ophartree_leaf_op441 hartree_leaf_op(const implT* f, const long& k) : f(f), k(k) {} 442 443 /// no pre-determination operatorhartree_leaf_op444 bool operator()(const Key<NDIM>& key) const {return false;} 445 446 /// no post-determination operatorhartree_leaf_op447 bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const { 448 MADNESS_EXCEPTION("no post-determination in hartree_leaf_op",1); 449 return true; 450 } 451 452 /// post-determination: true if f is a leaf and the result is well-represented 453 454 /// @param[in] key the hi-dimensional key (breaks into keys for f and g) 455 /// @param[in] fcoeff coefficients of f of its appropriate key in NS form 456 /// @param[in] gcoeff coefficients of g of its appropriate key in NS form operatorhartree_leaf_op457 bool operator()(const Key<NDIM>& key, const Tensor<T>& fcoeff, const Tensor<T>& gcoeff) const { 458 459 if (key.level()<2) return false; 460 Slice s = Slice(0,k-1); 461 std::vector<Slice> s0(NDIM/2,s); 462 463 const double tol=f->get_thresh(); 464 const double thresh=f->truncate_tol(tol, key); 465 // include the wavelets in the norm, makes it much more accurate 466 const double fnorm=fcoeff.normf(); 467 const double gnorm=gcoeff.normf(); 468 469 // if the final norm is small, perform the hartree product and return 470 const double norm=fnorm*gnorm; // computing the outer product 471 if (norm < thresh) return true; 472 473 // norm of the scaling function coefficients 474 const double sfnorm=fcoeff(s0).normf(); 475 const double sgnorm=gcoeff(s0).normf(); 476 477 // get the error of both functions and of the pair function; 478 // need the abs for numerics: sfnorm might be equal fnorm. 479 const double ferror=sqrt(std::abs(fnorm*fnorm-sfnorm*sfnorm)); 480 const double gerror=sqrt(std::abs(gnorm*gnorm-sgnorm*sgnorm)); 481 482 // if the expected error is small, perform the hartree product and return 483 const double error=fnorm*gerror + ferror*gnorm + ferror*gerror; 484 // const double error=sqrt(fnorm*fnorm*gnorm*gnorm - sfnorm*sfnorm*sgnorm*sgnorm); 485 486 if (error < thresh) return true; 487 return false; 488 } serializehartree_leaf_op489 template <typename Archive> void serialize (Archive& ar) { 490 ar & f & k; 491 } 492 }; 493 494 /// returns true if the result of the convolution operator op with some provided 495 /// coefficients will be small 496 template<typename T, size_t NDIM, typename opT> 497 struct op_leaf_op { 498 typedef FunctionImpl<T,NDIM> implT; 499 500 const opT* op; ///< the convolution operator 501 const implT* f; ///< the source or result function, needed for truncate_tol do_error_leaf_opop_leaf_op502 bool do_error_leaf_op() const {return true;} 503 op_leaf_opop_leaf_op504 op_leaf_op() {} op_leaf_opop_leaf_op505 op_leaf_op(const opT* op, const implT* f) : op(op), f(f) {} 506 507 /// pre-determination: we can't know if this will be a leaf node before we got the final coeffs operatorop_leaf_op508 bool operator()(const Key<NDIM>& key) const {return true;} 509 510 /// post-determination: return true if operator and coefficient norms are small operatorop_leaf_op511 bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const { 512 if (key.level()<2) return false; 513 const double cnorm=coeff.normf(); 514 return this->operator()(key,cnorm); 515 } 516 517 /// post-determination: return true if operator and coefficient norms are small operatorop_leaf_op518 bool operator()(const Key<NDIM>& key, const double& cnorm) const { 519 if (key.level()<2) return false; 520 521 typedef Key<opT::opdim> opkeyT; 522 const opkeyT source=op->get_source_key(key); 523 524 const double thresh=f->truncate_tol(f->get_thresh(),key); 525 const std::vector<opkeyT>& disp = op->get_disp(key.level()); 526 const opkeyT& d = *disp.begin(); // use the zero-displacement for screening 527 const double opnorm = op->norm(key.level(), d, source); 528 const double norm=opnorm*cnorm; 529 return norm<thresh; 530 531 } 532 serializeop_leaf_op533 template <typename Archive> void serialize (Archive& ar) { 534 ar & op & f; 535 } 536 537 }; 538 539 540 /// returns true if the result of a hartree_product is a leaf node 541 /// criteria are error, norm and its effect on a convolution operator 542 template<typename T, size_t NDIM, size_t LDIM, typename opT> 543 struct hartree_convolute_leaf_op { 544 545 typedef FunctionImpl<T,NDIM> implT; 546 typedef FunctionImpl<T,LDIM> implL; 547 548 const FunctionImpl<T,NDIM>* f; 549 const implL* g; // for use of its cdata only 550 const opT* op; do_error_leaf_ophartree_convolute_leaf_op551 bool do_error_leaf_op() const {return false;} 552 hartree_convolute_leaf_ophartree_convolute_leaf_op553 hartree_convolute_leaf_op() {} hartree_convolute_leaf_ophartree_convolute_leaf_op554 hartree_convolute_leaf_op(const implT* f, const implL* g, const opT* op) 555 : f(f), g(g), op(op) {} 556 557 /// no pre-determination operatorhartree_convolute_leaf_op558 bool operator()(const Key<NDIM>& key) const {return true;} 559 560 /// no post-determination operatorhartree_convolute_leaf_op561 bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const { 562 MADNESS_EXCEPTION("no post-determination in hartree_convolute_leaf_op",1); 563 return true; 564 } 565 566 /// post-determination: true if f is a leaf and the result is well-represented 567 568 /// @param[in] key the hi-dimensional key (breaks into keys for f and g) 569 /// @param[in] fcoeff coefficients of f of its appropriate key in NS form 570 /// @param[in] gcoeff coefficients of g of its appropriate key in NS form operatorhartree_convolute_leaf_op571 bool operator()(const Key<NDIM>& key, const Tensor<T>& fcoeff, const Tensor<T>& gcoeff) const { 572 // bool operator()(const Key<NDIM>& key, const GenTensor<T>& coeff) const { 573 574 if (key.level()<2) return false; 575 576 const double tol=f->get_thresh(); 577 const double thresh=f->truncate_tol(tol, key); 578 // include the wavelets in the norm, makes it much more accurate 579 const double fnorm=fcoeff.normf(); 580 const double gnorm=gcoeff.normf(); 581 582 // norm of the scaling function coefficients 583 const double sfnorm=fcoeff(g->get_cdata().s0).normf(); 584 const double sgnorm=gcoeff(g->get_cdata().s0).normf(); 585 586 // if the final norm is small, perform the hartree product and return 587 const double norm=fnorm*gnorm; // computing the outer product 588 if (norm < thresh) return true; 589 590 // get the error of both functions and of the pair function 591 const double ferror=sqrt(fnorm*fnorm-sfnorm*sfnorm); 592 const double gerror=sqrt(gnorm*gnorm-sgnorm*sgnorm); 593 594 // if the expected error is small, perform the hartree product and return 595 const double error=fnorm*gerror + ferror*gnorm + ferror*gerror; 596 if (error < thresh) return true; 597 598 // now check if the norm of this and the norm of the operator are significant 599 const std::vector<Key<NDIM> >& disp = op->get_disp(key.level()); 600 const Key<NDIM>& d = *disp.begin(); // use the zero-displacement for screening 601 const double opnorm = op->norm(key.level(), d, key); 602 const double final_norm=opnorm*sfnorm*sgnorm; 603 if (final_norm < thresh) return true; 604 605 return false; 606 } serializehartree_convolute_leaf_op607 template <typename Archive> void serialize (Archive& ar) { 608 ar & f & op; 609 } 610 }; 611 612 template<typename T, size_t NDIM> 613 struct noop { operatornoop614 void operator()(const Key<NDIM>& key, const GenTensor<T>& coeff, const bool& is_leaf) const {} operatornoop615 bool operator()(const Key<NDIM>& key, const GenTensor<T>& fcoeff, const GenTensor<T>& gcoeff) const { 616 MADNESS_EXCEPTION("in noop::operator()",1); 617 return true; 618 } serializenoop619 template <typename Archive> void serialize (Archive& ar) {} 620 621 }; 622 623 template<typename T, std::size_t NDIM> 624 struct insert_op { 625 typedef FunctionImpl<T,NDIM> implT; 626 typedef Key<NDIM> keyT; 627 typedef GenTensor<T> coeffT; 628 typedef FunctionNode<T,NDIM> nodeT; 629 630 implT* impl; insert_opinsert_op631 insert_op() : impl() {} insert_opinsert_op632 insert_op(implT* f) : impl(f) {} insert_opinsert_op633 insert_op(const insert_op& other) : impl(other.impl) {} operatorinsert_op634 void operator()(const keyT& key, const coeffT& coeff, const bool& is_leaf) const { 635 impl->get_coeffs().replace(key,nodeT(coeff,not is_leaf)); 636 } serializeinsert_op637 template <typename Archive> void serialize (Archive& ar) { 638 ar & impl; 639 } 640 641 }; 642 643 template<size_t NDIM> 644 struct true_op { 645 646 template<typename T> operatortrue_op647 bool operator()(const Key<NDIM>& key, const T& t) const {return true;} 648 649 template<typename T, typename R> operatortrue_op650 bool operator()(const Key<NDIM>& key, const T& t, const R& r) const {return true;} serializetrue_op651 template <typename Archive> void serialize (Archive& ar) {} 652 653 }; 654 655 /// shallow-copy, pared-down version of FunctionNode, for special purpose only 656 template<typename T, std::size_t NDIM> 657 struct ShallowNode { 658 typedef GenTensor<T> coeffT; 659 coeffT _coeffs; 660 bool _has_children; ShallowNodeShallowNode661 ShallowNode() : _coeffs(), _has_children(false) {} ShallowNodeShallowNode662 ShallowNode(const FunctionNode<T,NDIM>& node) 663 : _coeffs(node.coeff()), _has_children(node.has_children()) {} ShallowNodeShallowNode664 ShallowNode(const ShallowNode<T,NDIM>& node) 665 : _coeffs(node.coeff()), _has_children(node._has_children) {} 666 coeffShallowNode667 const coeffT& coeff() const {return _coeffs;} coeffShallowNode668 coeffT& coeff() {return _coeffs;} has_childrenShallowNode669 bool has_children() const {return _has_children;} is_leafShallowNode670 bool is_leaf() const {return not _has_children;} 671 template <typename Archive> serializeShallowNode672 void serialize(Archive& ar) { 673 ar & coeff() & _has_children; 674 } 675 }; 676 677 678 /// a class to track where relevant (parent) coeffs are 679 680 /// E.g. if a 6D function is composed of two 3D functions their coefficients must be tracked. 681 /// We might need coeffs from a box that does not exist, and to avoid searching for 682 /// parents we track which are their required respective boxes. 683 /// - CoeffTracker will refer either to a requested key, if it exists, or to its 684 /// outermost parent. 685 /// - Children must be made in sequential order to be able to track correctly. 686 /// 687 /// Usage: 1. make the child of a given CoeffTracker. 688 /// If the parent CoeffTracker refers to a leaf node (flag is_leaf) 689 /// the child will refer to the same node. Otherwise it will refer 690 /// to the child node. 691 /// 2. retrieve its coefficients (possible communication/ returns a Future). 692 /// Member variable key always refers to an existing node, 693 /// so we can fetch it. Once we have the node we can determine 694 /// if it has children which allows us to make a child (see 1. ) 695 template<typename T, size_t NDIM> 696 class CoeffTracker { 697 698 typedef FunctionImpl<T,NDIM> implT; 699 typedef Key<NDIM> keyT; 700 typedef GenTensor<T> coeffT; 701 typedef std::pair<Key<NDIM>,ShallowNode<T,NDIM> > datumT; 702 enum LeafStatus {no, yes, unknown}; 703 704 /// the funcimpl that has the coeffs 705 const implT* impl; 706 /// the current key, which must exists in impl 707 keyT key_; 708 /// flag if key is a leaf node 709 LeafStatus is_leaf_; 710 /// the coefficients belonging to key 711 coeffT coeff_; 712 public: 713 714 /// default ctor CoeffTracker()715 CoeffTracker() : impl(), key_(), is_leaf_(unknown), coeff_() {} 716 717 /// the initial ctor making the root key CoeffTracker(const implT * impl)718 CoeffTracker(const implT* impl) : impl(impl), is_leaf_(no) { 719 if (impl) key_=impl->get_cdata().key0; 720 } 721 722 /// ctor with a pair<keyT,nodeT> CoeffTracker(const CoeffTracker & other,const datumT & datum)723 explicit CoeffTracker(const CoeffTracker& other, const datumT& datum) : impl(other.impl), key_(other.key_), 724 coeff_(datum.second.coeff()) { 725 if (datum.second.is_leaf()) is_leaf_=yes; 726 else is_leaf_=no; 727 } 728 729 /// copy ctor CoeffTracker(const CoeffTracker & other)730 CoeffTracker(const CoeffTracker& other) : impl(other.impl), key_(other.key_), 731 is_leaf_(other.is_leaf_), coeff_(other.coeff_) {} 732 733 /// const reference to impl get_impl()734 const implT* get_impl() const {return impl;} 735 736 /// const reference to the coeffs coeff()737 const coeffT& coeff() const {return coeff_;} 738 739 /// const reference to the key key()740 const keyT& key() const {return key_;} 741 742 /// return the coefficients belonging to the passed-in key 743 744 /// if key equals tracked key just return the coeffs, otherwise 745 /// make the child coefficients. 746 /// @param[in] key return coeffs corresponding to this key 747 /// @return coefficients belonging to key coeff(const keyT & key)748 coeffT coeff(const keyT& key) const { 749 MADNESS_ASSERT(impl); 750 if (impl->is_compressed() or impl->is_nonstandard()) 751 return impl->parent_to_child_NS(key,key_,coeff_); 752 return impl->parent_to_child(coeff_,key_,key); 753 } 754 755 /// const reference to is_leaf flag is_leaf()756 const LeafStatus& is_leaf() const {return is_leaf_;} 757 758 /// make a child of this, ignoring the coeffs make_child(const keyT & child)759 CoeffTracker make_child(const keyT& child) const { 760 761 // fast return 762 if ((not impl) or impl->is_on_demand()) return CoeffTracker(*this); 763 764 // can't make a child without knowing if this is a leaf -- activate first 765 MADNESS_ASSERT((is_leaf_==yes) or (is_leaf_==no)); 766 767 CoeffTracker result; 768 if (impl) { 769 result.impl=impl; 770 if (is_leaf_==yes) result.key_=key_; 771 if (is_leaf_==no) { 772 result.key_=child; 773 // check if child is direct descendent of this, but root node is special case 774 if (child.level()>0) MADNESS_ASSERT(result.key().level()==key().level()+1); 775 } 776 result.is_leaf_=unknown; 777 } 778 return result; 779 } 780 781 /// find the coefficients 782 783 /// this involves communication to a remote node 784 /// @return a Future<CoeffTracker> with the coefficients that key refers to activate()785 Future<CoeffTracker> activate() const { 786 787 // fast return 788 if (not impl) return Future<CoeffTracker>(CoeffTracker()); 789 if (impl->is_on_demand()) return Future<CoeffTracker>(CoeffTracker(impl)); 790 791 // this will return a <keyT,nodeT> from a remote node 792 ProcessID p=impl->get_coeffs().owner(key()); 793 Future<datumT> datum1=impl->task(p, &implT::find_datum,key_,TaskAttributes::hipri()); 794 795 // construct a new CoeffTracker locally 796 return impl->world.taskq.add(*const_cast<CoeffTracker*> (this), 797 &CoeffTracker::forward_ctor,*this,datum1); 798 } 799 800 private: 801 /// taskq-compatible forwarding to the ctor forward_ctor(const CoeffTracker & other,const datumT & datum)802 CoeffTracker forward_ctor(const CoeffTracker& other, const datumT& datum) const { 803 return CoeffTracker(other,datum); 804 } 805 806 public: 807 /// serialization serialize(const Archive & ar)808 template <typename Archive> void serialize(const Archive& ar) { 809 int il=int(is_leaf_); 810 ar & impl & key_ & il & coeff_; 811 is_leaf_=LeafStatus(il); 812 } 813 }; 814 815 template<typename T, std::size_t NDIM> 816 std::ostream& 817 operator<<(std::ostream& s, const CoeffTracker<T,NDIM>& ct) { 818 s << ct.key() << ct.is_leaf() << " " << ct.get_impl(); 819 return s; 820 } 821 822 /// FunctionImpl holds all Function state to facilitate shallow copy semantics 823 824 /// Since Function assignment and copy constructors are shallow it 825 /// greatly simplifies maintaining consistent state to have all 826 /// (permanent) state encapsulated in a single class. The state 827 /// is shared between instances using a shared_ptr<FunctionImpl>. 828 /// 829 /// The FunctionImpl inherits all of the functionality of WorldContainer 830 /// (to store the coefficients) and WorldObject<WorldContainer> (used 831 /// for RMI and for its unqiue id). 832 /// 833 /// The class methods are public to avoid painful multiple friend template 834 /// declarations for Function and FunctionImpl ... but this trust should not be 835 /// abused ... NOTHING except FunctionImpl methods should mess with FunctionImplData. 836 /// The LB stuff might have to be an exception. 837 template <typename T, std::size_t NDIM> 838 class FunctionImpl : public WorldObject< FunctionImpl<T,NDIM> > { 839 private: 840 typedef WorldObject< FunctionImpl<T,NDIM> > woT; ///< Base class world object type 841 public: 842 typedef FunctionImpl<T,NDIM> implT; ///< Type of this class (implementation) 843 typedef std::shared_ptr< FunctionImpl<T,NDIM> > pimplT; ///< pointer to this class 844 typedef Tensor<T> tensorT; ///< Type of tensor for anything but to hold coeffs 845 typedef Vector<Translation,NDIM> tranT; ///< Type of array holding translation 846 typedef Key<NDIM> keyT; ///< Type of key 847 typedef FunctionNode<T,NDIM> nodeT; ///< Type of node 848 typedef GenTensor<T> coeffT; ///< Type of tensor used to hold coeffs 849 typedef WorldContainer<keyT,nodeT> dcT; ///< Type of container holding the coefficients 850 typedef std::pair<const keyT,nodeT> datumT; ///< Type of entry in container 851 typedef Vector<double,NDIM> coordT; ///< Type of vector holding coordinates 852 853 //template <typename Q, int D> friend class Function; 854 template <typename Q, std::size_t D> friend class FunctionImpl; 855 856 World& world; 857 858 /// getter get_initial_level()859 int get_initial_level()const{return initial_level;} get_special_level()860 int get_special_level()const{return special_level;} get_special_points()861 const std::vector<Vector<double,NDIM> >& get_special_points()const{return special_points;} 862 863 private: 864 int k; ///< Wavelet order 865 double thresh; ///< Screening threshold 866 int initial_level; ///< Initial level for refinement 867 int special_level; ///< Minimium level for refinement on special points 868 std::vector<Vector<double,NDIM> > special_points; ///< special points for further refinement (needed for composite functions or multiplication) 869 int max_refine_level; ///< Do not refine below this level 870 int truncate_mode; ///< 0=default=(|d|<thresh), 1=(|d|<thresh/2^n), 1=(|d|<thresh/4^n); 871 bool autorefine; ///< If true, autorefine where appropriate 872 bool truncate_on_project; ///< If true projection inserts at level n-1 not n 873 bool nonstandard; ///< If true, compress keeps scaling coeff 874 TensorArgs targs; ///< type of tensor to be used in the FunctionNodes 875 876 const FunctionCommonData<T,NDIM>& cdata; 877 878 std::shared_ptr< FunctionFunctorInterface<T,NDIM> > functor; 879 880 bool on_demand; ///< does this function have an additional functor? 881 bool compressed; ///< Compression status 882 bool redundant; ///< If true, function keeps sum coefficients on all levels 883 884 dcT coeffs; ///< The coefficients 885 886 // Disable the default copy constructor 887 FunctionImpl(const FunctionImpl<T,NDIM>& p); 888 889 public: 890 Timer timer_accumulate; 891 Timer timer_lr_result; 892 Timer timer_filter; 893 Timer timer_compress_svd; 894 Timer timer_target_driven; 895 bool do_new; 896 AtomicInt small; 897 AtomicInt large; 898 899 /// Initialize function impl from data in factory FunctionImpl(const FunctionFactory<T,NDIM> & factory)900 FunctionImpl(const FunctionFactory<T,NDIM>& factory) 901 : WorldObject<implT>(factory._world) 902 , world(factory._world) 903 , k(factory._k) 904 , thresh(factory._thresh) 905 , initial_level(factory._initial_level) 906 , special_level(factory._special_level) 907 , special_points(factory._special_points) 908 , max_refine_level(factory._max_refine_level) 909 , truncate_mode(factory._truncate_mode) 910 , autorefine(factory._autorefine) 911 , truncate_on_project(factory._truncate_on_project) 912 , nonstandard(false) 913 , targs(factory._thresh,FunctionDefaults<NDIM>::get_tensor_type()) 914 , cdata(FunctionCommonData<T,NDIM>::get(k)) 915 , functor(factory.get_functor()) 916 , on_demand(factory._is_on_demand) 917 , compressed(factory._compressed) 918 , redundant(false) 919 , coeffs(world,factory._pmap,false) 920 //, bc(factory._bc) 921 { 922 // PROFILE_MEMBER_FUNC(FunctionImpl); // No need to profile this 923 // !!! Ensure that all local state is correctly formed 924 // before invoking process_pending for the coeffs and 925 // for this. Otherwise, there is a race condition. 926 MADNESS_ASSERT(k>0 && k<=MAXK); 927 928 bool empty = (factory._empty or is_on_demand()); 929 bool do_refine = factory._refine; 930 931 if (do_refine) 932 initial_level = std::max(0,initial_level - 1); 933 934 if (empty) { // Do not set any coefficients at all 935 // additional functors are only evaluated on-demand 936 } else if (functor) { // Project function and optionally refine 937 insert_zero_down_to_initial_level(cdata.key0); 938 typename dcT::const_iterator end = coeffs.end(); 939 for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) { 940 if (it->second.is_leaf()) 941 woT::task(coeffs.owner(it->first), &implT::project_refine_op, it->first, do_refine, 942 functor->special_points()); 943 } 944 } 945 else { // Set as if a zero function 946 initial_level = 1; 947 insert_zero_down_to_initial_level(keyT(0)); 948 } 949 950 coeffs.process_pending(); 951 this->process_pending(); 952 if (factory._fence && (functor || !empty)) world.gop.fence(); 953 } 954 955 /// Copy constructor 956 957 /// Allocates a \em new function in preparation for a deep copy 958 /// 959 /// By default takes pmap from other but can also specify a different pmap. 960 /// Does \em not copy the coefficients ... creates an empty container. 961 template <typename Q> FunctionImpl(const FunctionImpl<Q,NDIM> & other,const std::shared_ptr<WorldDCPmapInterface<Key<NDIM>>> & pmap,bool dozero)962 FunctionImpl(const FunctionImpl<Q,NDIM>& other, 963 const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& pmap, 964 bool dozero) 965 : WorldObject<implT>(other.world) 966 , world(other.world) 967 , k(other.k) 968 , thresh(other.thresh) 969 , initial_level(other.initial_level) 970 , special_level(other.special_level) 971 , special_points(other.special_points) 972 , max_refine_level(other.max_refine_level) 973 , truncate_mode(other.truncate_mode) 974 , autorefine(other.autorefine) 975 , truncate_on_project(other.truncate_on_project) 976 , nonstandard(other.nonstandard) 977 , targs(other.targs) 978 , cdata(FunctionCommonData<T,NDIM>::get(k)) 979 , functor() 980 , on_demand(false) // since functor() is an default ctor 981 , compressed(other.compressed) 982 , redundant(other.redundant) 983 , coeffs(world, pmap ? pmap : other.coeffs.get_pmap()) 984 //, bc(other.bc) 985 { 986 if (dozero) { 987 initial_level = 1; 988 insert_zero_down_to_initial_level(cdata.key0); 989 //world.gop.fence(); <<<<<<<<<<<<<<<<<<<<<< needs a fence argument 990 } 991 coeffs.process_pending(); 992 this->process_pending(); 993 } 994 ~FunctionImpl()995 virtual ~FunctionImpl() { } 996 997 const std::shared_ptr< WorldDCPmapInterface< Key<NDIM> > >& get_pmap() const; 998 999 /// Copy coeffs from other into self 1000 template <typename Q> copy_coeffs(const FunctionImpl<Q,NDIM> & other,bool fence)1001 void copy_coeffs(const FunctionImpl<Q,NDIM>& other, bool fence) { 1002 typename FunctionImpl<Q,NDIM>::dcT::const_iterator end = other.coeffs.end(); 1003 for (typename FunctionImpl<Q,NDIM>::dcT::const_iterator it=other.coeffs.begin(); 1004 it!=end; ++it) { 1005 const keyT& key = it->first; 1006 const typename FunctionImpl<Q,NDIM>::nodeT& node = it->second; 1007 coeffs.replace(key,node. template convert<T>()); 1008 } 1009 if (fence) 1010 world.gop.fence(); 1011 } 1012 1013 /// perform inplace gaxpy: this = alpha*this + beta*other 1014 /// @param[in] alpha prefactor for this 1015 /// @param[in] beta prefactor for other 1016 /// @param[in] g the other function, reconstructed 1017 template<typename Q, typename R> gaxpy_inplace_reconstructed(const T & alpha,const FunctionImpl<Q,NDIM> & g,const R & beta,const bool fence)1018 void gaxpy_inplace_reconstructed(const T& alpha, const FunctionImpl<Q,NDIM>& g, const R& beta, const bool fence) { 1019 // merge g's tree into this' tree 1020 this->merge_trees(beta,g,alpha,true); 1021 1022 // sum down the sum coeffs into the leafs 1023 if (world.rank() == coeffs.owner(cdata.key0)) sum_down_spawn(cdata.key0, coeffT()); 1024 if (fence) world.gop.fence(); 1025 } 1026 1027 /// merge the trees of this and other, while multiplying them with the alpha or beta, resp 1028 1029 /// first step in an inplace gaxpy operation for reconstructed functions; assuming the same 1030 /// distribution for this and other 1031 1032 /// on output, *this = alpha* *this + beta * other 1033 /// @param[in] alpha prefactor for this 1034 /// @param[in] beta prefactor for other 1035 /// @param[in] other the other function, reconstructed 1036 template<typename Q, typename R> 1037 void merge_trees(const T alpha, const FunctionImpl<Q,NDIM>& other, const R beta, const bool fence=true) { 1038 MADNESS_ASSERT(get_pmap() == other.get_pmap()); 1039 other.flo_unary_op_node_inplace(do_merge_trees<Q,R>(alpha,beta,*this),fence); 1040 if (fence) world.gop.fence(); 1041 } 1042 1043 1044 /// perform: this= alpha*f + beta*g, invoked by result 1045 1046 /// f and g are reconstructed, so we can save on the compress operation, 1047 /// walk down the joint tree, and add leaf coefficients; effectively refines 1048 /// to common finest level. 1049 1050 /// nothing returned, but leaves this's tree reconstructed and as sum of f and g 1051 /// @param[in] alpha prefactor for f 1052 /// @param[in] f first addend 1053 /// @param[in] beta prefactor for g 1054 /// @param[in] g second addend 1055 void gaxpy_oop_reconstructed(const double alpha, const implT& f, 1056 const double beta, const implT& g, const bool fence); 1057 1058 /// functor for the gaxpy_inplace method 1059 template <typename Q, typename R> 1060 struct do_gaxpy_inplace { 1061 typedef Range<typename FunctionImpl<Q,NDIM>::dcT::const_iterator> rangeT; 1062 FunctionImpl<T,NDIM>* f; ///< prefactor for current function impl 1063 T alpha; ///< the current function impl 1064 R beta; ///< prefactor for other function impl do_gaxpy_inplacedo_gaxpy_inplace1065 do_gaxpy_inplace() {}; do_gaxpy_inplacedo_gaxpy_inplace1066 do_gaxpy_inplace(FunctionImpl<T,NDIM>* f, T alpha, R beta) : f(f), alpha(alpha), beta(beta) {} operatordo_gaxpy_inplace1067 bool operator()(typename rangeT::iterator& it) const { 1068 const keyT& key = it->first; 1069 const FunctionNode<Q,NDIM>& other_node = it->second; 1070 // Use send to get write accessor and automated construction if missing 1071 f->coeffs.send(key, &nodeT:: template gaxpy_inplace<Q,R>, alpha, other_node, beta); 1072 return true; 1073 } 1074 template <typename Archive> serializedo_gaxpy_inplace1075 void serialize(Archive& ar) {} 1076 }; 1077 1078 /// Inplace general bilinear operation 1079 /// @param[in] alpha prefactor for the current function impl 1080 /// @param[in] other the other function impl 1081 /// @param[in] beta prefactor for other 1082 template <typename Q, typename R> gaxpy_inplace(const T & alpha,const FunctionImpl<Q,NDIM> & other,const R & beta,bool fence)1083 void gaxpy_inplace(const T& alpha,const FunctionImpl<Q,NDIM>& other, const R& beta, bool fence) { 1084 MADNESS_ASSERT(get_pmap() == other.get_pmap()); 1085 if (alpha != T(1.0)) scale_inplace(alpha,false); 1086 typedef Range<typename FunctionImpl<Q,NDIM>::dcT::const_iterator> rangeT; 1087 typedef do_gaxpy_inplace<Q,R> opT; 1088 world.taskq.for_each<rangeT,opT>(rangeT(other.coeffs.begin(), other.coeffs.end()), opT(this, T(1.0), beta)); 1089 if (fence) 1090 world.gop.fence(); 1091 } 1092 1093 // loads a function impl from persistence 1094 // @param[in] ar the archive where the function impl is stored 1095 template <typename Archive> load(Archive & ar)1096 void load(Archive& ar) { 1097 // WE RELY ON K BEING STORED FIRST 1098 int kk = 0; 1099 ar & kk; 1100 1101 MADNESS_ASSERT(kk==k); 1102 1103 // note that functor should not be (re)stored 1104 ar & thresh & initial_level & max_refine_level & truncate_mode 1105 & autorefine & truncate_on_project & nonstandard & compressed ; //& bc; 1106 1107 ar & coeffs; 1108 world.gop.fence(); 1109 } 1110 1111 // saves a function impl to persistence 1112 // @param[in] ar the archive where the function impl is to be stored 1113 template <typename Archive> store(Archive & ar)1114 void store(Archive& ar) { 1115 // WE RELY ON K BEING STORED FIRST 1116 1117 // note that functor should not be (re)stored 1118 ar & k & thresh & initial_level & max_refine_level & truncate_mode 1119 & autorefine & truncate_on_project & nonstandard & compressed ; //& bc; 1120 1121 ar & coeffs; 1122 world.gop.fence(); 1123 } 1124 1125 /// Returns true if the function is compressed. 1126 bool is_compressed() const; 1127 1128 /// Returns true if the function is redundant. 1129 bool is_redundant() const; 1130 1131 bool is_nonstandard() const; 1132 1133 void set_functor(const std::shared_ptr<FunctionFunctorInterface<T,NDIM> > functor1); 1134 1135 std::shared_ptr<FunctionFunctorInterface<T,NDIM> > get_functor(); 1136 1137 std::shared_ptr<FunctionFunctorInterface<T,NDIM> > get_functor() const; 1138 1139 void unset_functor(); 1140 1141 bool& is_on_demand(); // ???????????????????? why returning reference 1142 1143 const bool& is_on_demand() const; // ????????????????????? 1144 1145 TensorType get_tensor_type() const; 1146 1147 TensorArgs get_tensor_args() const; 1148 1149 double get_thresh() const; 1150 1151 void set_thresh(double value); 1152 1153 bool get_autorefine() const; 1154 1155 void set_autorefine(bool value); 1156 1157 int get_k() const; 1158 1159 const dcT& get_coeffs() const; 1160 1161 dcT& get_coeffs(); 1162 1163 const FunctionCommonData<T,NDIM>& get_cdata() const; 1164 1165 void accumulate_timer(const double time) const; // !!!!!!!!!!!! REDUNDANT !!!!!!!!!!!!!!! 1166 1167 void print_timer() const; 1168 1169 void reset_timer(); 1170 1171 /// Adds a constant to the function. Local operation, optional fence 1172 1173 /// In scaling function basis must add value to first polyn in 1174 /// each box with appropriate scaling for level. In wavelet basis 1175 /// need only add at level zero. 1176 /// @param[in] t the scalar to be added 1177 void add_scalar_inplace(T t, bool fence); 1178 1179 /// Initialize nodes to zero function at initial_level of refinement. 1180 1181 /// Works for either basis. No communication. 1182 void insert_zero_down_to_initial_level(const keyT& key); 1183 1184 /// Truncate according to the threshold with optional global fence 1185 1186 /// If thresh<=0 the default value of this->thresh is used 1187 /// @param[in] tol the truncation tolerance 1188 void truncate(double tol, bool fence); 1189 1190 /// Returns true if after truncation this node has coefficients 1191 1192 /// Assumed to be invoked on process owning key. Possible non-blocking 1193 /// communication. 1194 /// @param[in] key the key of the current function node 1195 Future<bool> truncate_spawn(const keyT& key, double tol); 1196 1197 /// Actually do the truncate operation 1198 /// @param[in] key the key to the current function node being evaluated for truncation 1199 /// @param[in] tol the tolerance for thresholding 1200 /// @param[in] v vector of Future<bool>'s that specify whether the current nodes children have coeffs 1201 bool truncate_op(const keyT& key, double tol, const std::vector< Future<bool> >& v); 1202 1203 /// Evaluate function at quadrature points in the specified box 1204 1205 /// @param[in] key the key indicating where the quadrature points are located 1206 /// @param[in] f the interface to the elementary function 1207 /// @param[in] qx quadrature points on a level=0 box 1208 /// @param[out] fval values 1209 void fcube(const keyT& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, tensorT& fval) const; 1210 1211 /// Evaluate function at quadrature points in the specified box 1212 1213 /// @param[in] key the key indicating where the quadrature points are located 1214 /// @param[in] f the interface to the elementary function 1215 /// @param[in] qx quadrature points on a level=0 box 1216 /// @param[out] fval values 1217 void fcube(const keyT& key, T (*f)(const coordT&), const Tensor<double>& qx, tensorT& fval) const; 1218 1219 /// Returns cdata.key0 1220 const keyT& key0() const; 1221 1222 /// Prints the coeffs tree of the current function impl 1223 /// @param[in] maxlevel the maximum level of the tree for printing 1224 /// @param[out] os the ostream to where the output is sent 1225 void print_tree(std::ostream& os = std::cout, Level maxlevel = 10000) const; 1226 1227 /// Functor for the do_print_tree method 1228 void do_print_tree(const keyT& key, std::ostream& os, Level maxlevel) const; 1229 1230 /// Prints the coeffs tree of the current function impl (using GraphViz) 1231 /// @param[in] maxlevel the maximum level of the tree for printing 1232 /// @param[out] os the ostream to where the output is sent 1233 void print_tree_graphviz(std::ostream& os = std::cout, Level maxlevel = 10000) const; 1234 1235 /// Functor for the do_print_tree method (using GraphViz) 1236 void do_print_tree_graphviz(const keyT& key, std::ostream& os, Level maxlevel) const; 1237 1238 /// convert a number [0,limit] to a hue color code [blue,red], 1239 /// or, if log is set, a number [1.e-10,limit] 1240 struct do_convert_to_color { 1241 double limit; 1242 bool log; lowerdo_convert_to_color1243 static double lower() {return 1.e-10;}; do_convert_to_colordo_convert_to_color1244 do_convert_to_color() {}; do_convert_to_colordo_convert_to_color1245 do_convert_to_color(const double limit, const bool log) : limit(limit), log(log) {} operatordo_convert_to_color1246 double operator()(double val) const { 1247 double color=0.0; 1248 1249 if (log) { 1250 double val2=log10(val) - log10(lower()); // will yield >0.0 1251 double upper=log10(limit) -log10(lower()); 1252 val2=0.7-(0.7/upper)*val2; 1253 color= std::max(0.0,val2); 1254 color= std::min(0.7,color); 1255 } else { 1256 double hue=0.7-(0.7/limit)*(val); 1257 color= std::max(0.0,hue); 1258 } 1259 return color; 1260 } 1261 }; 1262 1263 1264 /// Print a plane ("xy", "xz", or "yz") containing the point x to file 1265 1266 /// works for all dimensions; we walk through the tree, and if a leaf node 1267 /// inside the sub-cell touches the plane we print it in pstricks format 1268 void print_plane(const std::string filename, const int xaxis, const int yaxis, const coordT& el2); 1269 1270 /// collect the data for a plot of the MRA structure locally on each node 1271 1272 /// @param[in] xaxis the x-axis in the plot (can be any axis of the MRA box) 1273 /// @param[in] yaxis the y-axis in the plot (can be any axis of the MRA box) 1274 /// @param[in] el2 needs a description 1275 /// \todo Provide a description for el2 1276 Tensor<double> print_plane_local(const int xaxis, const int yaxis, const coordT& el2); 1277 1278 /// Functor for the print_plane method 1279 /// @param[in] filename the filename for the output 1280 /// @param[in] plotinfo plotting parameters 1281 /// @param[in] xaxis the x-axis in the plot (can be any axis of the MRA box) 1282 /// @param[in] yaxis the y-axis in the plot (can be any axis of the MRA box) 1283 void do_print_plane(const std::string filename, std::vector<Tensor<double> > plotinfo, 1284 const int xaxis, const int yaxis, const coordT el2); 1285 1286 /// print the grid (the roots of the quadrature of each leaf box) 1287 /// of this function in user xyz coordinates 1288 /// @param[in] filename the filename for the output 1289 void print_grid(const std::string filename) const; 1290 1291 /// return the keys of the local leaf boxes 1292 std::vector<keyT> local_leaf_keys() const; 1293 1294 /// print the grid in xyz format 1295 1296 /// the quadrature points and the key information will be written to file, 1297 /// @param[in] filename where the quadrature points will be written to 1298 /// @param[in] keys all leaf keys 1299 void do_print_grid(const std::string filename, const std::vector<keyT>& keys) const; 1300 1301 /// read data from a grid 1302 1303 /// @param[in] keyfile file with keys and grid points for each key 1304 /// @param[in] gridfile file with grid points, w/o key, but with same ordering 1305 /// @param[in] vnuc_functor subtract the values of this functor if regularization is needed 1306 template<size_t FDIM> 1307 typename std::enable_if<NDIM==FDIM>::type read_grid(const std::string keyfile,const std::string gridfile,std::shared_ptr<FunctionFunctorInterface<double,NDIM>> vnuc_functor)1308 read_grid(const std::string keyfile, const std::string gridfile, 1309 std::shared_ptr< FunctionFunctorInterface<double,NDIM> > vnuc_functor) { 1310 1311 std::ifstream kfile(keyfile.c_str()); 1312 std::ifstream gfile(gridfile.c_str()); 1313 std::string line; 1314 1315 long ndata,ndata1; 1316 if (not (std::getline(kfile,line))) MADNESS_EXCEPTION("failed reading 1st line of key data",0); 1317 if (not (std::istringstream(line) >> ndata)) MADNESS_EXCEPTION("failed reading k",0); 1318 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 1st line of grid data",0); 1319 if (not (std::istringstream(line) >> ndata1)) MADNESS_EXCEPTION("failed reading k",0); 1320 MADNESS_ASSERT(ndata==ndata1); 1321 if (not (std::getline(kfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of key data",0); 1322 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of grid data",0); 1323 1324 // the quadrature points in simulation coordinates of the root node 1325 const Tensor<double> qx=cdata.quad_x; 1326 const size_t npt = qx.dim(0); 1327 1328 // the number of coordinates (grid point tuples) per box ({x1},{x2},{x3},..,{xNDIM}) 1329 long npoints=power<NDIM>(npt); 1330 // the number of boxes 1331 long nboxes=ndata/npoints; 1332 MADNESS_ASSERT(nboxes*npoints==ndata); 1333 print("reading ",nboxes,"boxes from file",gridfile,keyfile); 1334 1335 // these will be the data 1336 Tensor<T> values(cdata.vk,false); 1337 1338 int ii=0; 1339 std::string gline,kline; 1340 // while (1) { 1341 while (std::getline(kfile,kline)) { 1342 1343 double x,y,z,x1,y1,z1,val; 1344 1345 // get the key 1346 long nn; 1347 Translation l1,l2,l3; 1348 // line looks like: # key: n l1 l2 l3 1349 kline.erase(0,7); 1350 std::stringstream(kline) >> nn >> l1 >> l2 >> l3; 1351 // kfile >> s >> nn >> l1 >> l2 >> l3; 1352 const Vector<Translation,3> ll{ l1,l2,l3 }; 1353 Key<3> key(nn,ll); 1354 1355 // this is borrowed from fcube 1356 const Vector<Translation,3>& l = key.translation(); 1357 const Level n = key.level(); 1358 const double h = std::pow(0.5,double(n)); 1359 coordT c; // will hold the point in user coordinates 1360 const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width(); 1361 const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell(); 1362 1363 1364 if (NDIM == 3) { 1365 for (int i=0; i<npt; ++i) { 1366 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 1367 for (int j=0; j<npt; ++j) { 1368 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y 1369 for (int k=0; k<npt; ++k) { 1370 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z 1371 // fprintf(pFile,"%18.12f %18.12f %18.12f\n",c[0],c[1],c[2]); 1372 auto& success1 = std::getline(gfile,gline); MADNESS_ASSERT(success1); 1373 auto& success2 = std::getline(kfile,kline); MADNESS_ASSERT(success2); 1374 std::istringstream(gline) >> x >> y >> z >> val; 1375 std::istringstream(kline) >> x1 >> y1 >> z1; 1376 MADNESS_ASSERT(std::fabs(x-c[0])<1.e-4); 1377 MADNESS_ASSERT(std::fabs(x1-c[0])<1.e-4); 1378 MADNESS_ASSERT(std::fabs(y-c[1])<1.e-4); 1379 MADNESS_ASSERT(std::fabs(y1-c[1])<1.e-4); 1380 MADNESS_ASSERT(std::fabs(z-c[2])<1.e-4); 1381 MADNESS_ASSERT(std::fabs(z1-c[2])<1.e-4); 1382 1383 // regularize if a functor is given 1384 if (vnuc_functor) val-=(*vnuc_functor)(c); 1385 values(i,j,k)=val; 1386 } 1387 } 1388 } 1389 } else { 1390 MADNESS_EXCEPTION("only NDIM=3 in print_grid",0); 1391 } 1392 1393 // insert the new leaf node 1394 const bool has_children=false; 1395 coeffT coeff=coeffT(this->values2coeffs(key,values),targs); 1396 nodeT node(coeff,has_children); 1397 coeffs.replace(key,node); 1398 const_cast<dcT&>(coeffs).send(key.parent(), &FunctionNode<T,NDIM>::set_has_children_recursive, coeffs, key.parent()); 1399 ii++; 1400 } 1401 1402 kfile.close(); 1403 gfile.close(); 1404 MADNESS_ASSERT(ii==nboxes); 1405 1406 } 1407 1408 1409 /// read data from a grid 1410 1411 /// @param[in] gridfile file with keys and grid points and values for each key 1412 /// @param[in] vnuc_functor subtract the values of this functor if regularization is needed 1413 template<size_t FDIM> 1414 typename std::enable_if<NDIM==FDIM>::type read_grid2(const std::string gridfile,std::shared_ptr<FunctionFunctorInterface<double,NDIM>> vnuc_functor)1415 read_grid2(const std::string gridfile, 1416 std::shared_ptr< FunctionFunctorInterface<double,NDIM> > vnuc_functor) { 1417 1418 std::ifstream gfile(gridfile.c_str()); 1419 std::string line; 1420 1421 long ndata; 1422 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 1st line of grid data",0); 1423 if (not (std::istringstream(line) >> ndata)) MADNESS_EXCEPTION("failed reading k",0); 1424 if (not (std::getline(gfile,line))) MADNESS_EXCEPTION("failed reading 2nd line of grid data",0); 1425 1426 // the quadrature points in simulation coordinates of the root node 1427 const Tensor<double> qx=cdata.quad_x; 1428 const size_t npt = qx.dim(0); 1429 1430 // the number of coordinates (grid point tuples) per box ({x1},{x2},{x3},..,{xNDIM}) 1431 long npoints=power<NDIM>(npt); 1432 // the number of boxes 1433 long nboxes=ndata/npoints; 1434 MADNESS_ASSERT(nboxes*npoints==ndata); 1435 print("reading ",nboxes,"boxes from file",gridfile); 1436 1437 // these will be the data 1438 Tensor<T> values(cdata.vk,false); 1439 1440 int ii=0; 1441 std::string gline; 1442 // while (1) { 1443 while (std::getline(gfile,gline)) { 1444 1445 double x1,y1,z1,val; 1446 1447 // get the key 1448 long nn; 1449 Translation l1,l2,l3; 1450 // line looks like: # key: n l1 l2 l3 1451 gline.erase(0,7); 1452 std::stringstream(gline) >> nn >> l1 >> l2 >> l3; 1453 const Vector<Translation,3> ll{ l1,l2,l3 }; 1454 Key<3> key(nn,ll); 1455 1456 // this is borrowed from fcube 1457 const Vector<Translation,3>& l = key.translation(); 1458 const Level n = key.level(); 1459 const double h = std::pow(0.5,double(n)); 1460 coordT c; // will hold the point in user coordinates 1461 const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width(); 1462 const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell(); 1463 1464 1465 if (NDIM == 3) { 1466 for (int i=0; i<npt; ++i) { 1467 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 1468 for (int j=0; j<npt; ++j) { 1469 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y 1470 for (int k=0; k<npt; ++k) { 1471 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z 1472 1473 auto& success = std::getline(gfile,gline); 1474 MADNESS_ASSERT(success); 1475 std::istringstream(gline) >> x1 >> y1 >> z1 >> val; 1476 MADNESS_ASSERT(std::fabs(x1-c[0])<1.e-4); 1477 MADNESS_ASSERT(std::fabs(y1-c[1])<1.e-4); 1478 MADNESS_ASSERT(std::fabs(z1-c[2])<1.e-4); 1479 1480 // regularize if a functor is given 1481 if (vnuc_functor) val-=(*vnuc_functor)(c); 1482 values(i,j,k)=val; 1483 } 1484 } 1485 } 1486 } else { 1487 MADNESS_EXCEPTION("only NDIM=3 in print_grid",0); 1488 } 1489 1490 // insert the new leaf node 1491 const bool has_children=false; 1492 coeffT coeff=coeffT(this->values2coeffs(key,values),targs); 1493 nodeT node(coeff,has_children); 1494 coeffs.replace(key,node); 1495 const_cast<dcT&>(coeffs).send(key.parent(), 1496 &FunctionNode<T,NDIM>::set_has_children_recursive, 1497 coeffs, key.parent()); 1498 ii++; 1499 } 1500 1501 gfile.close(); 1502 MADNESS_ASSERT(ii==nboxes); 1503 1504 } 1505 1506 1507 /// Compute by projection the scaling function coeffs in specified box 1508 /// @param[in] key the key to the current function node (box) 1509 tensorT project(const keyT& key) const; 1510 1511 /// Returns the truncation threshold according to truncate_method 1512 1513 /// here is our handwaving argument: 1514 /// this threshold will give each FunctionNode an error of less than tol. The 1515 /// total error can then be as high as sqrt(#nodes) * tol. Therefore in order 1516 /// to account for higher dimensions: divide tol by about the root of number 1517 /// of siblings (2^NDIM) that have a large error when we refine along a deep 1518 /// branch of the tree. 1519 double truncate_tol(double tol, const keyT& key) const; 1520 1521 1522 /// Returns patch referring to coeffs of child in parent box 1523 /// @param[in] child the key to the child function node (box) 1524 std::vector<Slice> child_patch(const keyT& child) const; 1525 1526 /// Projection with optional refinement w/ special points 1527 /// @param[in] key the key to the current function node (box) 1528 /// @param[in] do_refine should we continue refinement? 1529 /// @param[in] specialpts vector of special points in the function where we need 1530 /// to refine at a much finer level 1531 void project_refine_op(const keyT& key, bool do_refine, 1532 const std::vector<Vector<double,NDIM> >& specialpts); 1533 1534 /// Compute the Legendre scaling functions for multiplication 1535 1536 /// Evaluate parent polyn at quadrature points of a child. The prefactor of 1537 /// 2^n/2 is included. The tensor must be preallocated as phi(k,npt). 1538 /// Refer to the implementation notes for more info. 1539 /// @todo Robert please verify this comment. I don't understand this method. 1540 /// @param[in] np level of the parent function node (box) 1541 /// @param[in] nc level of the child function node (box) 1542 /// @param[in] lp translation of the parent function node (box) 1543 /// @param[in] lc translation of the child function node (box) 1544 /// @param[out] phi tensor of the legendre scaling functions 1545 void phi_for_mul(Level np, Translation lp, Level nc, Translation lc, Tensor<double>& phi) const; 1546 1547 /// Directly project parent coeffs to child coeffs 1548 1549 /// Currently used by diff, but other uses can be anticipated 1550 1551 /// @todo is this documentation correct? 1552 /// @param[in] child the key whose coeffs we are requesting 1553 /// @param[in] parent the (leaf) key of our function 1554 /// @param[in] s the (leaf) coeffs belonging to parent 1555 /// @return coeffs 1556 const coeffT parent_to_child(const coeffT& s, const keyT& parent, const keyT& child) const; 1557 1558 /// Directly project parent NS coeffs to child NS coeffs 1559 1560 /// return the NS coefficients if parent and child are the same, 1561 /// or construct sum coeffs from the parents and "add" zero wavelet coeffs 1562 /// @param[in] child the key whose coeffs we are requesting 1563 /// @param[in] parent the (leaf) key of our function 1564 /// @param[in] coeff the (leaf) coeffs belonging to parent 1565 /// @return coeffs in NS form 1566 coeffT parent_to_child_NS(const keyT& child, const keyT& parent, 1567 const coeffT& coeff) const; 1568 1569 /// Get the scaling function coeffs at level n starting from NS form 1570 // N=2^n, M=N/q, q must be power of 2 1571 // q=0 return coeffs [N,k] for direct sum 1572 // q>0 return coeffs [k,q,M] for fft sum 1573 tensorT coeffs_for_jun(Level n, long q=0); 1574 1575 /// Return the values when given the coeffs in scaling function basis 1576 /// @param[in] key the key of the function node (box) 1577 /// @param[in] coeff the tensor of scaling function coefficients for function node (box) 1578 /// @return function values for function node (box) 1579 template <typename Q> coeffs2values(const keyT & key,const GenTensor<Q> & coeff)1580 GenTensor<Q> coeffs2values(const keyT& key, const GenTensor<Q>& coeff) const { 1581 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1582 double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1583 return transform(coeff,cdata.quad_phit).scale(scale); 1584 } 1585 1586 /// convert S or NS coeffs to values on a 2k grid of the children 1587 1588 /// equivalent to unfiltering the NS coeffs and then converting all child S-coeffs 1589 /// to values in their respective boxes. If only S coeffs are provided d coeffs are 1590 /// assumed to be zero. Reverse operation to values2NScoeffs(). 1591 /// @param[in] key the key of the current S or NS coeffs, level n 1592 /// @param[in] coeff coeffs in S or NS form; if S then d coeffs are assumed zero 1593 /// @param[in] s_only sanity check to avoid unintended discard of d coeffs 1594 /// @return function values on the quadrature points of the children of child (!) 1595 template <typename Q> NScoeffs2values(const keyT & key,const GenTensor<Q> & coeff,const bool s_only)1596 GenTensor<Q> NScoeffs2values(const keyT& key, const GenTensor<Q>& coeff, 1597 const bool s_only) const { 1598 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1599 1600 // sanity checks 1601 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) == s_only); 1602 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) or (coeff.dim(0)==2*this->get_k())); 1603 1604 // this is a block-diagonal matrix with the quadrature points on the diagonal 1605 Tensor<double> quad_phit_2k(2*cdata.k,2*cdata.npt); 1606 quad_phit_2k(cdata.s[0],cdata.s[0])=cdata.quad_phit; 1607 quad_phit_2k(cdata.s[1],cdata.s[1])=cdata.quad_phit; 1608 1609 // the transformation matrix unfilters (cdata.hg) and transforms to values in one step 1610 const Tensor<double> transf = (s_only) 1611 ? inner(cdata.hg(Slice(0,k-1),_),quad_phit_2k) // S coeffs 1612 : inner(cdata.hg,quad_phit_2k); // NS coeffs 1613 1614 // increment the level since the coeffs2values part happens on level n+1 1615 const double scale = pow(2.0,0.5*NDIM*(key.level()+1))/ 1616 sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1617 1618 return transform(coeff,transf).scale(scale); 1619 } 1620 1621 /// Compute the function values for multiplication 1622 1623 /// Given S or NS coefficients from a parent cell, compute the value of 1624 /// the functions at the quadrature points of a child 1625 /// currently restricted to special cases 1626 /// @param[in] child key of the box in which we compute values 1627 /// @param[in] parent key of the parent box holding the coeffs 1628 /// @param[in] coeff coeffs of the parent box 1629 /// @param[in] s_only sanity check to avoid unintended discard of d coeffs 1630 /// @return function values on the quadrature points of the children of child (!) 1631 template <typename Q> NS_fcube_for_mul(const keyT & child,const keyT & parent,const GenTensor<Q> & coeff,const bool s_only)1632 GenTensor<Q> NS_fcube_for_mul(const keyT& child, const keyT& parent, 1633 const GenTensor<Q>& coeff, const bool s_only) const { 1634 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1635 1636 // sanity checks 1637 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) == s_only); 1638 MADNESS_ASSERT((coeff.dim(0)==this->get_k()) or (coeff.dim(0)==2*this->get_k())); 1639 1640 // fast return if possible 1641 // if (child.level()==parent.level()) return NScoeffs2values(child,coeff,s_only); 1642 1643 if (s_only) { 1644 1645 Tensor<double> quad_phi[NDIM]; 1646 // tmp tensor 1647 Tensor<double> phi1(cdata.k,cdata.npt); 1648 1649 for (std::size_t d=0; d<NDIM; ++d) { 1650 1651 // input is S coeffs (dimension k), output is values on 2*npt grid points 1652 quad_phi[d]=Tensor<double>(cdata.k,2*cdata.npt); 1653 1654 // for both children of "child" evaluate the Legendre polynomials 1655 // first the left child on level n+1 and translations 2l 1656 phi_for_mul(parent.level(),parent.translation()[d], 1657 child.level()+1, 2*child.translation()[d], phi1); 1658 quad_phi[d](_,Slice(0,k-1))=phi1; 1659 1660 // next the right child on level n+1 and translations 2l+1 1661 phi_for_mul(parent.level(),parent.translation()[d], 1662 child.level()+1, 2*child.translation()[d]+1, phi1); 1663 quad_phi[d](_,Slice(k,2*k-1))=phi1; 1664 } 1665 1666 const double scale = 1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1667 return general_transform(coeff,quad_phi).scale(scale); 1668 } 1669 MADNESS_EXCEPTION("you should not be here in NS_fcube_for_mul",1); 1670 return GenTensor<Q>(); 1671 } 1672 1673 /// convert function values of the a child generation directly to NS coeffs 1674 1675 /// equivalent to converting the function values to 2^NDIM S coeffs and then 1676 /// filtering them to NS coeffs. Reverse operation to NScoeffs2values(). 1677 /// @param[in] key key of the parent of the generation 1678 /// @param[in] values tensor holding function values of the 2^NDIM children of key 1679 /// @return NS coeffs belonging to key 1680 template <typename Q> values2NScoeffs(const keyT & key,const GenTensor<Q> & values)1681 GenTensor<Q> values2NScoeffs(const keyT& key, const GenTensor<Q>& values) const { 1682 //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1683 1684 // sanity checks 1685 MADNESS_ASSERT(values.dim(0)==2*this->get_k()); 1686 1687 // this is a block-diagonal matrix with the quadrature points on the diagonal 1688 Tensor<double> quad_phit_2k(2*cdata.npt,2*cdata.k); 1689 quad_phit_2k(cdata.s[0],cdata.s[0])=cdata.quad_phiw; 1690 quad_phit_2k(cdata.s[1],cdata.s[1])=cdata.quad_phiw; 1691 1692 // the transformation matrix unfilters (cdata.hg) and transforms to values in one step 1693 const Tensor<double> transf=inner(quad_phit_2k,cdata.hgT); 1694 1695 // increment the level since the values2coeffs part happens on level n+1 1696 const double scale = pow(0.5,0.5*NDIM*(key.level()+1)) 1697 *sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1698 1699 return transform(values,transf).scale(scale); 1700 } 1701 1702 /// Return the scaling function coeffs when given the function values at the quadrature points 1703 /// @param[in] key the key of the function node (box) 1704 /// @return function values for function node (box) 1705 template <typename Q> coeffs2values(const keyT & key,const Tensor<Q> & coeff)1706 Tensor<Q> coeffs2values(const keyT& key, const Tensor<Q>& coeff) const { 1707 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1708 double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1709 return transform(coeff,cdata.quad_phit).scale(scale); 1710 } 1711 1712 template <typename Q> values2coeffs(const keyT & key,const GenTensor<Q> & values)1713 GenTensor<Q> values2coeffs(const keyT& key, const GenTensor<Q>& values) const { 1714 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1715 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1716 return transform(values,cdata.quad_phiw).scale(scale); 1717 } 1718 1719 template <typename Q> values2coeffs(const keyT & key,const Tensor<Q> & values)1720 Tensor<Q> values2coeffs(const keyT& key, const Tensor<Q>& values) const { 1721 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1722 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1723 return transform(values,cdata.quad_phiw).scale(scale); 1724 } 1725 1726 /// Compute the function values for multiplication 1727 1728 /// Given coefficients from a parent cell, compute the value of 1729 /// the functions at the quadrature points of a child 1730 /// @param[in] child the key for the child function node (box) 1731 /// @param[in] parent the key for the parent function node (box) 1732 /// @param[in] coeff the coefficients of scaling function basis of the parent box 1733 template <typename Q> fcube_for_mul(const keyT & child,const keyT & parent,const Tensor<Q> & coeff)1734 Tensor<Q> fcube_for_mul(const keyT& child, const keyT& parent, const Tensor<Q>& coeff) const { 1735 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1736 if (child.level() == parent.level()) { 1737 return coeffs2values(parent, coeff); 1738 } 1739 else if (child.level() < parent.level()) { 1740 MADNESS_EXCEPTION("FunctionImpl: fcube_for_mul: child-parent relationship bad?",0); 1741 } 1742 else { 1743 Tensor<double> phi[NDIM]; 1744 for (std::size_t d=0; d<NDIM; ++d) { 1745 phi[d] = Tensor<double>(cdata.k,cdata.npt); 1746 phi_for_mul(parent.level(),parent.translation()[d], 1747 child.level(), child.translation()[d], phi[d]); 1748 } 1749 return general_transform(coeff,phi).scale(1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume()));; 1750 } 1751 } 1752 1753 1754 /// Compute the function values for multiplication 1755 1756 /// Given coefficients from a parent cell, compute the value of 1757 /// the functions at the quadrature points of a child 1758 /// @param[in] child the key for the child function node (box) 1759 /// @param[in] parent the key for the parent function node (box) 1760 /// @param[in] coeff the coefficients of scaling function basis of the parent box 1761 template <typename Q> fcube_for_mul(const keyT & child,const keyT & parent,const GenTensor<Q> & coeff)1762 GenTensor<Q> fcube_for_mul(const keyT& child, const keyT& parent, const GenTensor<Q>& coeff) const { 1763 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1764 if (child.level() == parent.level()) { 1765 return coeffs2values(parent, coeff); 1766 } 1767 else if (child.level() < parent.level()) { 1768 MADNESS_EXCEPTION("FunctionImpl: fcube_for_mul: child-parent relationship bad?",0); 1769 } 1770 else { 1771 Tensor<double> phi[NDIM]; 1772 for (size_t d=0; d<NDIM; d++) { 1773 phi[d] = Tensor<double>(cdata.k,cdata.npt); 1774 phi_for_mul(parent.level(),parent.translation()[d], 1775 child.level(), child.translation()[d], phi[d]); 1776 } 1777 return general_transform(coeff,phi).scale(1.0/sqrt(FunctionDefaults<NDIM>::get_cell_volume())); 1778 } 1779 } 1780 1781 1782 /// Functor for the mul method 1783 template <typename L, typename R> do_mul(const keyT & key,const Tensor<L> & left,const std::pair<keyT,Tensor<R>> & arg)1784 void do_mul(const keyT& key, const Tensor<L>& left, const std::pair< keyT, Tensor<R> >& arg) { 1785 // PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1786 const keyT& rkey = arg.first; 1787 const Tensor<R>& rcoeff = arg.second; 1788 //madness::print("do_mul: r", rkey, rcoeff.size()); 1789 Tensor<R> rcube = fcube_for_mul(key, rkey, rcoeff); 1790 //madness::print("do_mul: l", key, left.size()); 1791 Tensor<L> lcube = fcube_for_mul(key, key, left); 1792 1793 Tensor<T> tcube(cdata.vk,false); 1794 TERNARY_OPTIMIZED_ITERATOR(T, tcube, L, lcube, R, rcube, *_p0 = *_p1 * *_p2;); 1795 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1796 tcube = transform(tcube,cdata.quad_phiw).scale(scale); 1797 coeffs.replace(key, nodeT(coeffT(tcube,targs),false)); 1798 } 1799 1800 1801 /// multiply the values of two coefficient tensors using a custom number of grid points 1802 1803 /// note both coefficient tensors have to refer to the same key! 1804 /// @param[in] c1 a tensor holding coefficients 1805 /// @param[in] c2 another tensor holding coeffs 1806 /// @param[in] npt number of grid points (optional, default is cdata.npt) 1807 /// @return coefficient tensor holding the product of the values of c1 and c2 1808 template<typename R> mul(const Tensor<T> & c1,const Tensor<R> & c2,const int npt,const keyT & key)1809 Tensor<TENSOR_RESULT_TYPE(T,R)> mul(const Tensor<T>& c1, const Tensor<R>& c2, 1810 const int npt, const keyT& key) const { 1811 typedef TENSOR_RESULT_TYPE(T,R) resultT; 1812 1813 const FunctionCommonData<T,NDIM>& cdata2=FunctionCommonData<T,NDIM>::get(npt); 1814 1815 // construct a tensor with the npt coeffs 1816 Tensor<T> c11(cdata2.vk), c22(cdata2.vk); 1817 c11(this->cdata.s0)=c1; 1818 c22(this->cdata.s0)=c2; 1819 1820 // it's sufficient to scale once 1821 double scale = pow(2.0,0.5*NDIM*key.level())/sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1822 Tensor<T> c1value=transform(c11,cdata2.quad_phit).scale(scale); 1823 Tensor<R> c2value=transform(c22,cdata2.quad_phit); 1824 Tensor<resultT> resultvalue(cdata2.vk,false); 1825 TERNARY_OPTIMIZED_ITERATOR(resultT, resultvalue, T, c1value, R, c2value, *_p0 = *_p1 * *_p2;); 1826 1827 Tensor<resultT> result=transform(resultvalue,cdata2.quad_phiw); 1828 1829 // return a copy of the slice to have the tensor contiguous 1830 return copy(result(this->cdata.s0)); 1831 } 1832 1833 1834 /// Functor for the binary_op method 1835 template <typename L, typename R, typename opT> do_binary_op(const keyT & key,const Tensor<L> & left,const std::pair<keyT,Tensor<R>> & arg,const opT & op)1836 void do_binary_op(const keyT& key, const Tensor<L>& left, 1837 const std::pair< keyT, Tensor<R> >& arg, 1838 const opT& op) { 1839 //PROFILE_MEMBER_FUNC(FunctionImpl); // Too fine grain for routine profiling 1840 const keyT& rkey = arg.first; 1841 const Tensor<R>& rcoeff = arg.second; 1842 Tensor<R> rcube = fcube_for_mul(key, rkey, rcoeff); 1843 Tensor<L> lcube = fcube_for_mul(key, key, left); 1844 1845 Tensor<T> tcube(cdata.vk,false); 1846 op(key, tcube, lcube, rcube); 1847 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 1848 tcube = transform(tcube,cdata.quad_phiw).scale(scale); 1849 coeffs.replace(key, nodeT(coeffT(tcube,targs),false)); 1850 } 1851 1852 /// Invoked by result to perform result += alpha*left+beta*right in wavelet basis 1853 1854 /// Does not assume that any of result, left, right have the same distribution. 1855 /// For most purposes result will start as an empty so actually are implementing 1856 /// out of place gaxpy. If all functions have the same distribution there is 1857 /// no communication except for the optional fence. 1858 template <typename L, typename R> gaxpy(T alpha,const FunctionImpl<L,NDIM> & left,T beta,const FunctionImpl<R,NDIM> & right,bool fence)1859 void gaxpy(T alpha, const FunctionImpl<L,NDIM>& left, 1860 T beta, const FunctionImpl<R,NDIM>& right, bool fence) { 1861 // Loop over local nodes in both functions. Add in left and subtract right. 1862 // Not that efficient in terms of memory bandwidth but ensures we do 1863 // not miss any nodes. 1864 typename FunctionImpl<L,NDIM>::dcT::const_iterator left_end = left.coeffs.end(); 1865 for (typename FunctionImpl<L,NDIM>::dcT::const_iterator it=left.coeffs.begin(); 1866 it!=left_end; 1867 ++it) { 1868 const keyT& key = it->first; 1869 const typename FunctionImpl<L,NDIM>::nodeT& other_node = it->second; 1870 coeffs.send(key, &nodeT:: template gaxpy_inplace<T,L>, 1.0, other_node, alpha); 1871 } 1872 typename FunctionImpl<R,NDIM>::dcT::const_iterator right_end = right.coeffs.end(); 1873 for (typename FunctionImpl<R,NDIM>::dcT::const_iterator it=right.coeffs.begin(); 1874 it!=right_end; 1875 ++it) { 1876 const keyT& key = it->first; 1877 const typename FunctionImpl<L,NDIM>::nodeT& other_node = it->second; 1878 coeffs.send(key, &nodeT:: template gaxpy_inplace<T,R>, 1.0, other_node, beta); 1879 } 1880 if (fence) 1881 world.gop.fence(); 1882 } 1883 1884 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence 1885 /// @param[in] op the unary operator for the coefficients 1886 template <typename opT> unary_op_coeff_inplace(const opT & op,bool fence)1887 void unary_op_coeff_inplace(const opT& op, bool fence) { 1888 typename dcT::iterator end = coeffs.end(); 1889 for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) { 1890 const keyT& parent = it->first; 1891 nodeT& node = it->second; 1892 if (node.has_coeff()) { 1893 // op(parent, node.coeff()); 1894 TensorArgs full(-1.0,TT_FULL); 1895 change_tensor_type(node.coeff(),full); 1896 op(parent, node.coeff().full_tensor()); 1897 change_tensor_type(node.coeff(),targs); 1898 // op(parent,node); 1899 } 1900 } 1901 if (fence) 1902 world.gop.fence(); 1903 } 1904 1905 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence 1906 /// @param[in] op the unary operator for the coefficients 1907 template <typename opT> unary_op_node_inplace(const opT & op,bool fence)1908 void unary_op_node_inplace(const opT& op, bool fence) { 1909 typename dcT::iterator end = coeffs.end(); 1910 for (typename dcT::iterator it=coeffs.begin(); it!=end; ++it) { 1911 const keyT& parent = it->first; 1912 nodeT& node = it->second; 1913 op(parent, node); 1914 } 1915 if (fence) 1916 world.gop.fence(); 1917 } 1918 1919 /// Integrate over one particle of a two particle function and get a one particle function 1920 /// bsp \int g(1,2) \delta(2-1) d2 = f(1) 1921 /// The overall dimension of g should be even 1922 1923 /// The operator 1924 template<std::size_t LDIM> dirac_convolution_op(const keyT & key,const nodeT & node,FunctionImpl<T,LDIM> * f)1925 void dirac_convolution_op(const keyT &key, const nodeT &node, FunctionImpl<T,LDIM>* f) const { 1926 // fast return if the node has children (not a leaf node) 1927 if(node.has_children()) return; 1928 1929 const implT* g=this; 1930 1931 // break the 6D key into two 3D keys (may also work for every even dimension) 1932 Key<LDIM> key1, key2; 1933 key.break_apart(key1,key2); 1934 1935 // get the coefficients of the 6D function g 1936 const coeffT& g_coeff = node.coeff(); 1937 1938 // get the values of the 6D function g 1939 coeffT g_values = g->coeffs2values(key,g_coeff); 1940 1941 // Determine rank and k 1942 const long rank=g_values.rank(); 1943 const long maxk=f->get_k(); 1944 MADNESS_ASSERT(maxk==g_coeff.dim(0)); 1945 1946 // get tensors for particle 1 and 2 (U and V in SVD) 1947 tensorT vec1=copy(g_values.config().ref_vector(0).reshape(rank,maxk,maxk,maxk)); 1948 tensorT vec2=g_values.config().ref_vector(1).reshape(rank,maxk,maxk,maxk); 1949 tensorT result(maxk,maxk,maxk); // should give zero tensor 1950 // Multiply the values of each U and V vector 1951 for (long i=0; i<rank; ++i) { 1952 tensorT c1=vec1(Slice(i,i),_,_,_); // shallow copy (!) 1953 tensorT c2=vec2(Slice(i,i),_,_,_); 1954 c1.emul(c2); // this changes vec1 because of shallow copy, but not the g function because of the deep copy made above 1955 double singular_value_i = g_values.config().weights(i); 1956 result += (singular_value_i*c1); 1957 } 1958 1959 // accumulate coefficients (since only diagonal boxes are used the coefficients get just replaced, but accumulate is needed to create the right tree structure 1960 tensorT f_coeff = f->values2coeffs(key1,result); 1961 f->coeffs.task(key1, &FunctionNode<T,LDIM>::accumulate2, f_coeff, f->coeffs, key1, TaskAttributes::hipri()); 1962 // coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri()); 1963 1964 1965 return; 1966 } 1967 1968 1969 template<std::size_t LDIM> do_dirac_convolution(FunctionImpl<T,LDIM> * f,bool fence)1970 void do_dirac_convolution(FunctionImpl<T,LDIM>* f, bool fence) const { 1971 typename dcT::const_iterator end = this->coeffs.end(); 1972 for (typename dcT::const_iterator it=this->coeffs.begin(); it!=end; ++it) { 1973 // looping through all the leaf(!) coefficients in the NDIM function ("this") 1974 const keyT& key = it->first; 1975 const FunctionNode<T,NDIM>& node = it->second; 1976 if (node.is_leaf()) { 1977 // only process the diagonal boxes 1978 Key<LDIM> key1, key2; 1979 key.break_apart(key1,key2); 1980 if(key1 == key2){ 1981 ProcessID p = coeffs.owner(key); 1982 woT::task(p, &implT:: template dirac_convolution_op<LDIM>, key, node, f); 1983 } 1984 } 1985 } 1986 world.gop.fence(); // fence is necessary if trickle down is used afterwards 1987 // trickle down and undo redundand shouldnt change anything if only the diagonal elements are considered above -> check this 1988 f->trickle_down(true); // fence must be true otherwise undo_redundant will have trouble 1989 f->undo_redundant(true); 1990 f->verify_tree(); 1991 //if (fence) world.gop.fence(); // unnecessary, fence is activated in undo_redundant 1992 1993 } 1994 1995 1996 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence 1997 /// @param[in] op the unary operator for the coefficients 1998 template <typename opT> flo_unary_op_node_inplace(const opT & op,bool fence)1999 void flo_unary_op_node_inplace(const opT& op, bool fence) { 2000 typedef Range<typename dcT::iterator> rangeT; 2001 // typedef do_unary_op_value_inplace<opT> xopT; 2002 world.taskq.for_each<rangeT,opT>(rangeT(coeffs.begin(), coeffs.end()), op); 2003 if (fence) world.gop.fence(); 2004 } 2005 2006 /// Unary operation applied inplace to the coefficients WITHOUT refinement, optional fence 2007 /// @param[in] op the unary operator for the coefficients 2008 template <typename opT> flo_unary_op_node_inplace(const opT & op,bool fence)2009 void flo_unary_op_node_inplace(const opT& op, bool fence) const { 2010 typedef Range<typename dcT::const_iterator> rangeT; 2011 // typedef do_unary_op_value_inplace<opT> xopT; 2012 world.taskq.for_each<rangeT,opT>(rangeT(coeffs.begin(), coeffs.end()), op); 2013 if (fence) 2014 world.gop.fence(); 2015 } 2016 2017 /// truncate tree at a certain level 2018 /// @param[in] max_level truncate tree below this level 2019 void erase(const Level& max_level); 2020 2021 /// Returns some asymmetry measure ... no comms 2022 double check_symmetry_local() const; 2023 2024 /// given an NS tree resulting from a convolution, truncate leafs if appropriate 2025 struct do_truncate_NS_leafs { 2026 typedef Range<typename dcT::iterator> rangeT; 2027 const implT* f; // for calling its member functions 2028 do_truncate_NS_leafsdo_truncate_NS_leafs2029 do_truncate_NS_leafs(const implT* f) : f(f) {} 2030 operatordo_truncate_NS_leafs2031 bool operator()(typename rangeT::iterator& it) const { 2032 2033 const keyT& key = it->first; 2034 nodeT& node = it->second; 2035 2036 if (node.is_leaf() and node.coeff().has_data()) { 2037 coeffT d = copy(node.coeff()); 2038 d(f->cdata.s0)=0.0; 2039 const double error=d.normf(); 2040 const double tol=f->truncate_tol(f->get_thresh(),key); 2041 if (error<tol) node.coeff()=copy(node.coeff()(f->cdata.s0)); 2042 } 2043 return true; 2044 } serializedo_truncate_NS_leafs2045 template <typename Archive> void serialize(const Archive& ar) {} 2046 2047 }; 2048 2049 /// remove all coefficients of internal nodes 2050 /// presumably to switch from redundant to reconstructed state 2051 struct remove_internal_coeffs { 2052 typedef Range<typename dcT::iterator> rangeT; 2053 2054 /// constructor need impl for cdata remove_internal_coeffsremove_internal_coeffs2055 remove_internal_coeffs() {} 2056 operatorremove_internal_coeffs2057 bool operator()(typename rangeT::iterator& it) const { 2058 2059 nodeT& node = it->second; 2060 if (node.has_children()) node.clear_coeff(); 2061 return true; 2062 } serializeremove_internal_coeffs2063 template <typename Archive> void serialize(const Archive& ar) {} 2064 2065 }; 2066 2067 2068 /// keep only the sum coefficients in each node 2069 struct do_keep_sum_coeffs { 2070 typedef Range<typename dcT::iterator> rangeT; 2071 implT* impl; 2072 2073 /// constructor need impl for cdata do_keep_sum_coeffsdo_keep_sum_coeffs2074 do_keep_sum_coeffs(implT* impl) :impl(impl) {} 2075 operatordo_keep_sum_coeffs2076 bool operator()(typename rangeT::iterator& it) const { 2077 2078 nodeT& node = it->second; 2079 coeffT s=copy(node.coeff()(impl->cdata.s0)); 2080 node.coeff()=s; 2081 return true; 2082 } serializedo_keep_sum_coeffs2083 template <typename Archive> void serialize(const Archive& ar) {} 2084 2085 }; 2086 2087 2088 /// reduce the rank of the nodes, optional fence 2089 struct do_reduce_rank { 2090 typedef Range<typename dcT::iterator> rangeT; 2091 2092 // threshold for rank reduction / SVD truncation 2093 TensorArgs args; 2094 2095 // constructor takes target precision do_reduce_rankdo_reduce_rank2096 do_reduce_rank() {} do_reduce_rankdo_reduce_rank2097 do_reduce_rank(const TensorArgs& targs) : args(targs) {} do_reduce_rankdo_reduce_rank2098 do_reduce_rank(const double& thresh) { 2099 args.thresh=thresh; 2100 } 2101 2102 // operatordo_reduce_rank2103 bool operator()(typename rangeT::iterator& it) const { 2104 2105 nodeT& node = it->second; 2106 node.reduceRank(args.thresh); 2107 return true; 2108 } serializedo_reduce_rank2109 template <typename Archive> void serialize(const Archive& ar) {} 2110 }; 2111 2112 2113 2114 /// check symmetry wrt particle exchange 2115 struct do_check_symmetry_local { 2116 typedef Range<typename dcT::const_iterator> rangeT; 2117 const implT* f; do_check_symmetry_localdo_check_symmetry_local2118 do_check_symmetry_local() {} do_check_symmetry_localdo_check_symmetry_local2119 do_check_symmetry_local(const implT& f) : f(&f) {} 2120 2121 /// return the norm of the difference of this node and its "mirror" node operatordo_check_symmetry_local2122 double operator()(typename rangeT::iterator& it) const { 2123 2124 const keyT& key = it->first; 2125 const nodeT& fnode = it->second; 2126 2127 // skip internal nodes 2128 if (fnode.has_children()) return 0.0; 2129 2130 if (f->world.size()>1) return 0.0; 2131 2132 // exchange particles 2133 std::vector<long> map(NDIM); 2134 map[0]=3; map[1]=4; map[2]=5; 2135 map[3]=0; map[4]=1; map[5]=2; 2136 2137 // make mapped key 2138 Vector<Translation,NDIM> l; 2139 for (std::size_t i=0; i<NDIM; ++i) l[map[i]] = key.translation()[i]; 2140 const keyT mapkey(key.level(),l); 2141 2142 double norm=0.0; 2143 2144 2145 // hope it's local 2146 if (f->get_coeffs().probe(mapkey)) { 2147 MADNESS_ASSERT(f->get_coeffs().probe(mapkey)); 2148 const nodeT& mapnode=f->get_coeffs().find(mapkey).get()->second; 2149 2150 bool have_c1=fnode.coeff().has_data() and fnode.coeff().config().has_data(); 2151 bool have_c2=mapnode.coeff().has_data() and mapnode.coeff().config().has_data(); 2152 2153 if (have_c1 and have_c2) { 2154 tensorT c1=fnode.coeff().full_tensor_copy(); 2155 tensorT c2=mapnode.coeff().full_tensor_copy(); 2156 c2 = copy(c2.mapdim(map)); 2157 norm=(c1-c2).normf(); 2158 } else if (have_c1) { 2159 tensorT c1=fnode.coeff().full_tensor_copy(); 2160 norm=c1.normf(); 2161 } else if (have_c2) { 2162 tensorT c2=mapnode.coeff().full_tensor_copy(); 2163 norm=c2.normf(); 2164 } else { 2165 norm=0.0; 2166 } 2167 } else { 2168 norm=fnode.coeff().normf(); 2169 } 2170 return norm*norm; 2171 } 2172 operatordo_check_symmetry_local2173 double operator()(double a, double b) const { 2174 return (a+b); 2175 } 2176 serializedo_check_symmetry_local2177 template <typename Archive> void serialize(const Archive& ar) { 2178 MADNESS_EXCEPTION("no serialization of do_check_symmetry yet",1); 2179 } 2180 2181 2182 }; 2183 2184 /// merge the coefficent boxes of this into other's tree 2185 2186 /// no comm, and the tree should be in an consistent state by virtue 2187 /// of FunctionNode::gaxpy_inplace 2188 template<typename Q, typename R> 2189 struct do_merge_trees { 2190 typedef Range<typename dcT::const_iterator> rangeT; 2191 FunctionImpl<Q,NDIM>* other; 2192 T alpha; 2193 R beta; do_merge_treesdo_merge_trees2194 do_merge_trees() {} do_merge_treesdo_merge_trees2195 do_merge_trees(const T alpha, const R beta, FunctionImpl<Q,NDIM>& other) 2196 : other(&other), alpha(alpha), beta(beta) {} 2197 2198 /// return the norm of the difference of this node and its "mirror" node operatordo_merge_trees2199 bool operator()(typename rangeT::iterator& it) const { 2200 2201 const keyT& key = it->first; 2202 const nodeT& fnode = it->second; 2203 2204 // if other's node exists: add this' coeffs to it 2205 // otherwise insert this' node into other's tree 2206 typename dcT::accessor acc; 2207 if (other->get_coeffs().find(acc,key)) { 2208 nodeT& gnode=acc->second; 2209 gnode.gaxpy_inplace(beta,fnode,alpha); 2210 } else { 2211 nodeT gnode=fnode; 2212 gnode.scale(alpha); 2213 other->get_coeffs().replace(key,gnode); 2214 } 2215 return true; 2216 } 2217 serializedo_merge_trees2218 template <typename Archive> void serialize(const Archive& ar) { 2219 MADNESS_EXCEPTION("no serialization of do_merge_trees",1); 2220 } 2221 }; 2222 2223 2224 /// map this on f 2225 struct do_mapdim { 2226 typedef Range<typename dcT::iterator> rangeT; 2227 2228 std::vector<long> map; 2229 implT* f; 2230 do_mapdimdo_mapdim2231 do_mapdim() : f(0) {}; do_mapdimdo_mapdim2232 do_mapdim(const std::vector<long> map, implT& f) : map(map), f(&f) {} 2233 operatordo_mapdim2234 bool operator()(typename rangeT::iterator& it) const { 2235 2236 const keyT& key = it->first; 2237 const nodeT& node = it->second; 2238 2239 Vector<Translation,NDIM> l; 2240 for (std::size_t i=0; i<NDIM; ++i) l[map[i]] = key.translation()[i]; 2241 tensorT c = node.coeff().full_tensor_copy(); 2242 if (c.size()) c = copy(c.mapdim(map)); 2243 coeffT cc(c,f->get_tensor_args()); 2244 f->get_coeffs().replace(keyT(key.level(),l), nodeT(cc,node.has_children())); 2245 2246 return true; 2247 } serializedo_mapdim2248 template <typename Archive> void serialize(const Archive& ar) { 2249 MADNESS_EXCEPTION("no serialization of do_mapdim",1); 2250 } 2251 2252 }; 2253 2254 /// "put" this on g 2255 struct do_average { 2256 typedef Range<typename dcT::const_iterator> rangeT; 2257 2258 implT* g; 2259 do_averagedo_average2260 do_average() {} do_averagedo_average2261 do_average(implT& g) : g(&g) {} 2262 2263 /// iterator it points to this operatordo_average2264 bool operator()(typename rangeT::iterator& it) const { 2265 2266 const keyT& key = it->first; 2267 const nodeT& fnode = it->second; 2268 2269 // fast return if rhs has no coeff here 2270 if (fnode.has_coeff()) { 2271 2272 // check if there is a node already existing 2273 typename dcT::accessor acc; 2274 if (g->get_coeffs().find(acc,key)) { 2275 nodeT& gnode=acc->second; 2276 if (gnode.has_coeff()) gnode.coeff()+=fnode.coeff(); 2277 } else { 2278 g->get_coeffs().replace(key,fnode); 2279 } 2280 } 2281 2282 return true; 2283 } serializedo_average2284 template <typename Archive> void serialize(const Archive& ar) {} 2285 }; 2286 2287 /// change representation of nodes' coeffs to low rank, optional fence 2288 struct do_change_tensor_type { 2289 typedef Range<typename dcT::iterator> rangeT; 2290 2291 // threshold for rank reduction / SVD truncation 2292 TensorArgs targs; 2293 2294 // constructor takes target precision do_change_tensor_typedo_change_tensor_type2295 do_change_tensor_type() {} do_change_tensor_typedo_change_tensor_type2296 do_change_tensor_type(const TensorArgs& targs) : targs(targs) {} 2297 2298 // operatordo_change_tensor_type2299 bool operator()(typename rangeT::iterator& it) const { 2300 2301 nodeT& node = it->second; 2302 change_tensor_type(node.coeff(),targs); 2303 return true; 2304 2305 } serializedo_change_tensor_type2306 template <typename Archive> void serialize(const Archive& ar) {} 2307 }; 2308 2309 struct do_consolidate_buffer { 2310 typedef Range<typename dcT::iterator> rangeT; 2311 2312 // threshold for rank reduction / SVD truncation 2313 TensorArgs targs; 2314 2315 // constructor takes target precision do_consolidate_bufferdo_consolidate_buffer2316 do_consolidate_buffer() {} do_consolidate_bufferdo_consolidate_buffer2317 do_consolidate_buffer(const TensorArgs& targs) : targs(targs) {} operatordo_consolidate_buffer2318 bool operator()(typename rangeT::iterator& it) const { 2319 it->second.consolidate_buffer(targs); 2320 return true; 2321 } serializedo_consolidate_buffer2322 template <typename Archive> void serialize(const Archive& ar) {} 2323 }; 2324 2325 2326 2327 template <typename opT> 2328 struct do_unary_op_value_inplace { 2329 typedef Range<typename dcT::iterator> rangeT; 2330 implT* impl; 2331 opT op; do_unary_op_value_inplacedo_unary_op_value_inplace2332 do_unary_op_value_inplace(implT* impl, const opT& op) : impl(impl), op(op) {} operatordo_unary_op_value_inplace2333 bool operator()(typename rangeT::iterator& it) const { 2334 const keyT& key = it->first; 2335 nodeT& node = it->second; 2336 if (node.has_coeff()) { 2337 const TensorArgs full_args(-1.0,TT_FULL); 2338 change_tensor_type(node.coeff(),full_args); 2339 tensorT& t= node.coeff().full_tensor(); 2340 //double before = t.normf(); 2341 tensorT values = impl->fcube_for_mul(key, key, t); 2342 op(key, values); 2343 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 2344 t = transform(values,impl->cdata.quad_phiw).scale(scale); 2345 node.coeff()=coeffT(t,impl->get_tensor_args()); 2346 //double after = t.normf(); 2347 //madness::print("XOP:", key, before, after); 2348 } 2349 return true; 2350 } serializedo_unary_op_value_inplace2351 template <typename Archive> void serialize(const Archive& ar) {} 2352 }; 2353 2354 template <typename Q, typename R> 2355 /// @todo I don't know what this does other than a trasform vtransform_doit(const std::shared_ptr<FunctionImpl<R,NDIM>> & right,const Tensor<Q> & c,const std::vector<std::shared_ptr<FunctionImpl<T,NDIM>>> & vleft,double tol)2356 void vtransform_doit(const std::shared_ptr< FunctionImpl<R,NDIM> >& right, 2357 const Tensor<Q>& c, 2358 const std::vector< std::shared_ptr< FunctionImpl<T,NDIM> > >& vleft, 2359 double tol) { 2360 // To reduce crunch on vectors being transformed each task 2361 // does them in a random order 2362 std::vector<unsigned int> ind(vleft.size()); 2363 for (unsigned int i=0; i<vleft.size(); ++i) { 2364 ind[i] = i; 2365 } 2366 for (unsigned int i=0; i<vleft.size(); ++i) { 2367 unsigned int j = RandomValue<int>()%vleft.size(); 2368 std::swap(ind[i],ind[j]); 2369 } 2370 2371 typename FunctionImpl<R,NDIM>::dcT::const_iterator end = right->coeffs.end(); 2372 for (typename FunctionImpl<R,NDIM>::dcT::const_iterator it=right->coeffs.begin(); it != end; ++it) { 2373 if (it->second.has_coeff()) { 2374 const Key<NDIM>& key = it->first; 2375 const GenTensor<R>& r = it->second.coeff(); 2376 double norm = r.normf(); 2377 double keytol = truncate_tol(tol,key); 2378 2379 for (unsigned int j=0; j<vleft.size(); ++j) { 2380 unsigned int i = ind[j]; // Random permutation 2381 if (std::abs(norm*c(i)) > keytol) { 2382 implT* left = vleft[i].get(); 2383 typename dcT::accessor acc; 2384 bool newnode = left->coeffs.insert(acc,key); 2385 if (newnode && key.level()>0) { 2386 Key<NDIM> parent = key.parent(); 2387 if (left->coeffs.is_local(parent)) 2388 left->coeffs.send(parent, &nodeT::set_has_children_recursive, left->coeffs, parent); 2389 else 2390 left->coeffs.task(parent, &nodeT::set_has_children_recursive, left->coeffs, parent); 2391 2392 } 2393 nodeT& node = acc->second; 2394 if (!node.has_coeff()) 2395 node.set_coeff(coeffT(cdata.v2k,targs)); 2396 coeffT& t = node.coeff(); 2397 t.gaxpy(1.0, r, c(i)); 2398 } 2399 } 2400 } 2401 } 2402 } 2403 2404 /// Refine multiple functions down to the same finest level 2405 2406 /// @param v the vector of functions we are refining. 2407 /// @param key the current node. 2408 /// @param c the vector of coefficients passed from above. 2409 void refine_to_common_level(const std::vector<FunctionImpl<T,NDIM>*>& v, 2410 const std::vector<tensorT>& c, 2411 const keyT key); 2412 2413 /// Inplace operate on many functions (impl's) with an operator within a certain box 2414 /// @param[in] key the key of the current function node (box) 2415 /// @param[in] op the operator 2416 /// @param[in] v the vector of function impl's on which to be operated 2417 template <typename opT> multiop_values_doit(const keyT & key,const opT & op,const std::vector<implT * > & v)2418 void multiop_values_doit(const keyT& key, const opT& op, const std::vector<implT*>& v) { 2419 std::vector<tensorT> c(v.size()); 2420 for (unsigned int i=0; i<v.size(); i++) { 2421 if (v[i]) { 2422 coeffT cc = coeffs2values(key, v[i]->coeffs.find(key).get()->second.coeff()); 2423 c[i]=cc.full_tensor(); 2424 } 2425 } 2426 tensorT r = op(key, c); 2427 coeffs.replace(key, nodeT(coeffT(values2coeffs(key, r),targs),false)); 2428 } 2429 2430 /// Inplace operate on many functions (impl's) with an operator within a certain box 2431 /// Assumes all functions have been refined down to the same level 2432 /// @param[in] op the operator 2433 /// @param[in] v the vector of function impl's on which to be operated 2434 template <typename opT> multiop_values(const opT & op,const std::vector<implT * > & v)2435 void multiop_values(const opT& op, const std::vector<implT*>& v) { 2436 // rough check on refinement level (ignore non-initialized functions 2437 for (std::size_t i=1; i<v.size(); ++i) { 2438 if (v[i] and v[i-1]) { 2439 MADNESS_ASSERT(v[i]->coeffs.size()==v[i-1]->coeffs.size()); 2440 } 2441 } 2442 typename dcT::iterator end = v[0]->coeffs.end(); 2443 for (typename dcT::iterator it=v[0]->coeffs.begin(); it!=end; ++it) { 2444 const keyT& key = it->first; 2445 if (it->second.has_coeff()) 2446 world.taskq.add(*this, &implT:: template multiop_values_doit<opT>, key, op, v); 2447 else 2448 coeffs.replace(key, nodeT(coeffT(),true)); 2449 } 2450 world.gop.fence(); 2451 } 2452 2453 /// Inplace operate on many functions (impl's) with an operator within a certain box 2454 2455 /// @param[in] key the key of the current function node (box) 2456 /// @param[in] op the operator 2457 /// @param[in] vin the vector of function impl's on which to be operated 2458 /// @param[out] vout the resulting vector of function impl's 2459 template <typename opT> multi_to_multi_op_values_doit(const keyT & key,const opT & op,const std::vector<implT * > & vin,std::vector<implT * > & vout)2460 void multi_to_multi_op_values_doit(const keyT& key, const opT& op, 2461 const std::vector<implT*>& vin, std::vector<implT*>& vout) { 2462 std::vector<tensorT> c(vin.size()); 2463 for (unsigned int i=0; i<vin.size(); i++) { 2464 if (vin[i]) { 2465 coeffT cc = coeffs2values(key, vin[i]->coeffs.find(key).get()->second.coeff()); 2466 c[i]=cc.full_tensor(); 2467 } 2468 } 2469 std::vector<tensorT> r = op(key, c); 2470 MADNESS_ASSERT(r.size()==vout.size()); 2471 for (std::size_t i=0; i<vout.size(); ++i) { 2472 vout[i]->coeffs.replace(key, nodeT(coeffT(values2coeffs(key, r[i]),targs),false)); 2473 } 2474 } 2475 2476 /// Inplace operate on many functions (impl's) with an operator within a certain box 2477 2478 /// Assumes all functions have been refined down to the same level 2479 /// @param[in] op the operator 2480 /// @param[in] vin the vector of function impl's on which to be operated 2481 /// @param[out] vout the resulting vector of function impl's 2482 template <typename opT> 2483 void multi_to_multi_op_values(const opT& op, const std::vector<implT*>& vin, 2484 std::vector<implT*>& vout, const bool fence=true) { 2485 // rough check on refinement level (ignore non-initialized functions 2486 for (std::size_t i=1; i<vin.size(); ++i) { 2487 if (vin[i] and vin[i-1]) { 2488 MADNESS_ASSERT(vin[i]->coeffs.size()==vin[i-1]->coeffs.size()); 2489 } 2490 } 2491 typename dcT::iterator end = vin[0]->coeffs.end(); 2492 for (typename dcT::iterator it=vin[0]->coeffs.begin(); it!=end; ++it) { 2493 const keyT& key = it->first; 2494 if (it->second.has_coeff()) 2495 world.taskq.add(*this, &implT:: template multi_to_multi_op_values_doit<opT>, 2496 key, op, vin, vout); 2497 else { 2498 // fill result functions with empty box in this key 2499 for (implT* it2 : vout) { 2500 it2->coeffs.replace(key, nodeT(coeffT(),true)); 2501 } 2502 } 2503 } 2504 if (fence) world.gop.fence(); 2505 } 2506 2507 /// Transforms a vector of functions left[i] = sum[j] right[j]*c[j,i] using sparsity 2508 /// @param[in] vright vector of functions (impl's) on which to be transformed 2509 /// @param[in] c the tensor (matrix) transformer 2510 /// @param[in] vleft vector of of the *newly* transformed functions (impl's) 2511 template <typename Q, typename R> vtransform(const std::vector<std::shared_ptr<FunctionImpl<R,NDIM>>> & vright,const Tensor<Q> & c,const std::vector<std::shared_ptr<FunctionImpl<T,NDIM>>> & vleft,double tol,bool fence)2512 void vtransform(const std::vector< std::shared_ptr< FunctionImpl<R,NDIM> > >& vright, 2513 const Tensor<Q>& c, 2514 const std::vector< std::shared_ptr< FunctionImpl<T,NDIM> > >& vleft, 2515 double tol, 2516 bool fence) { 2517 for (unsigned int j=0; j<vright.size(); ++j) { 2518 world.taskq.add(*this, &implT:: template vtransform_doit<Q,R>, vright[j], copy(c(j,_)), vleft, tol); 2519 } 2520 if (fence) 2521 world.gop.fence(); 2522 } 2523 2524 /// Unary operation applied inplace to the values with optional refinement and fence 2525 /// @param[in] op the unary operator for the values 2526 template <typename opT> unary_op_value_inplace(const opT & op,bool fence)2527 void unary_op_value_inplace(const opT& op, bool fence) { 2528 typedef Range<typename dcT::iterator> rangeT; 2529 typedef do_unary_op_value_inplace<opT> xopT; 2530 world.taskq.for_each<rangeT,xopT>(rangeT(coeffs.begin(), coeffs.end()), xopT(this,op)); 2531 if (fence) 2532 world.gop.fence(); 2533 } 2534 2535 // Multiplication assuming same distribution and recursive descent 2536 /// Both left and right functions are in the scaling function basis 2537 /// @param[in] key the key to the current function node (box) 2538 /// @param[in] left the function impl associated with the left function 2539 /// @param[in] lcin the scaling function coefficients associated with the 2540 /// current box in the left function 2541 /// @param[in] vrightin the vector of function impl's associated with 2542 /// the vector of right functions 2543 /// @param[in] vrcin the vector scaling function coefficients associated with the 2544 /// current box in the right functions 2545 /// @param[out] vresultin the vector of resulting functions (impl's) 2546 template <typename L, typename R> mulXXveca(const keyT & key,const FunctionImpl<L,NDIM> * left,const Tensor<L> & lcin,const std::vector<const FunctionImpl<R,NDIM> * > vrightin,const std::vector<Tensor<R>> & vrcin,const std::vector<FunctionImpl<T,NDIM> * > vresultin,double tol)2547 void mulXXveca(const keyT& key, 2548 const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin, 2549 const std::vector<const FunctionImpl<R,NDIM>*> vrightin, 2550 const std::vector< Tensor<R> >& vrcin, 2551 const std::vector<FunctionImpl<T,NDIM>*> vresultin, 2552 double tol) { 2553 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT; 2554 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT; 2555 2556 double lnorm = 1e99; 2557 Tensor<L> lc = lcin; 2558 if (lc.size() == 0) { 2559 literT it = left->coeffs.find(key).get(); 2560 MADNESS_ASSERT(it != left->coeffs.end()); 2561 lnorm = it->second.get_norm_tree(); 2562 if (it->second.has_coeff()) 2563 lc = it->second.coeff().full_tensor_copy(); 2564 } 2565 2566 // Loop thru RHS functions seeing if anything can be multiplied 2567 std::vector<FunctionImpl<T,NDIM>*> vresult; 2568 std::vector<const FunctionImpl<R,NDIM>*> vright; 2569 std::vector< Tensor<R> > vrc; 2570 vresult.reserve(vrightin.size()); 2571 vright.reserve(vrightin.size()); 2572 vrc.reserve(vrightin.size()); 2573 2574 for (unsigned int i=0; i<vrightin.size(); ++i) { 2575 FunctionImpl<T,NDIM>* result = vresultin[i]; 2576 const FunctionImpl<R,NDIM>* right = vrightin[i]; 2577 Tensor<R> rc = vrcin[i]; 2578 double rnorm; 2579 if (rc.size() == 0) { 2580 riterT it = right->coeffs.find(key).get(); 2581 MADNESS_ASSERT(it != right->coeffs.end()); 2582 rnorm = it->second.get_norm_tree(); 2583 if (it->second.has_coeff()) 2584 rc = it->second.coeff().full_tensor_copy(); 2585 } 2586 else { 2587 rnorm = rc.normf(); 2588 } 2589 2590 if (rc.size() && lc.size()) { // Yipee! 2591 result->task(world.rank(), &implT:: template do_mul<L,R>, key, lc, std::make_pair(key,rc)); 2592 } 2593 else if (tol && lnorm*rnorm < truncate_tol(tol, key)) { 2594 result->coeffs.replace(key, nodeT(coeffT(cdata.vk,targs),false)); // Zero leaf 2595 } 2596 else { // Interior node 2597 result->coeffs.replace(key, nodeT(coeffT(),true)); 2598 vresult.push_back(result); 2599 vright.push_back(right); 2600 vrc.push_back(rc); 2601 } 2602 } 2603 2604 if (vresult.size()) { 2605 Tensor<L> lss; 2606 if (lc.size()) { 2607 Tensor<L> ld(cdata.v2k); 2608 ld(cdata.s0) = lc(___); 2609 lss = left->unfilter(ld); 2610 } 2611 2612 std::vector< Tensor<R> > vrss(vresult.size()); 2613 for (unsigned int i=0; i<vresult.size(); ++i) { 2614 if (vrc[i].size()) { 2615 Tensor<R> rd(cdata.v2k); 2616 rd(cdata.s0) = vrc[i](___); 2617 vrss[i] = vright[i]->unfilter(rd); 2618 } 2619 } 2620 2621 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2622 const keyT& child = kit.key(); 2623 Tensor<L> ll; 2624 2625 std::vector<Slice> cp = child_patch(child); 2626 2627 if (lc.size()) 2628 ll = copy(lss(cp)); 2629 2630 std::vector< Tensor<R> > vv(vresult.size()); 2631 for (unsigned int i=0; i<vresult.size(); ++i) { 2632 if (vrc[i].size()) 2633 vv[i] = copy(vrss[i](cp)); 2634 } 2635 2636 woT::task(coeffs.owner(child), &implT:: template mulXXveca<L,R>, child, left, ll, vright, vv, vresult, tol); 2637 } 2638 } 2639 } 2640 2641 /// Multiplication using recursive descent and assuming same distribution 2642 /// Both left and right functions are in the scaling function basis 2643 /// @param[in] key the key to the current function node (box) 2644 /// @param[in] left the function impl associated with the left function 2645 /// @param[in] lcin the scaling function coefficients associated with the 2646 /// current box in the left function 2647 /// @param[in] right the function impl associated with the right function 2648 /// @param[in] rcin the scaling function coefficients associated with the 2649 /// current box in the right function 2650 template <typename L, typename R> mulXXa(const keyT & key,const FunctionImpl<L,NDIM> * left,const Tensor<L> & lcin,const FunctionImpl<R,NDIM> * right,const Tensor<R> & rcin,double tol)2651 void mulXXa(const keyT& key, 2652 const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin, 2653 const FunctionImpl<R,NDIM>* right,const Tensor<R>& rcin, 2654 double tol) { 2655 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT; 2656 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT; 2657 2658 double lnorm=1e99, rnorm=1e99; 2659 2660 Tensor<L> lc = lcin; 2661 if (lc.size() == 0) { 2662 literT it = left->coeffs.find(key).get(); 2663 MADNESS_ASSERT(it != left->coeffs.end()); 2664 lnorm = it->second.get_norm_tree(); 2665 if (it->second.has_coeff()) 2666 lc = it->second.coeff().full_tensor_copy(); 2667 } 2668 2669 Tensor<R> rc = rcin; 2670 if (rc.size() == 0) { 2671 riterT it = right->coeffs.find(key).get(); 2672 MADNESS_ASSERT(it != right->coeffs.end()); 2673 rnorm = it->second.get_norm_tree(); 2674 if (it->second.has_coeff()) 2675 rc = it->second.coeff().full_tensor_copy(); 2676 } 2677 2678 // both nodes are leaf nodes: multiply and return 2679 if (rc.size() && lc.size()) { // Yipee! 2680 do_mul<L,R>(key, lc, std::make_pair(key,rc)); 2681 return; 2682 } 2683 2684 if (tol) { 2685 if (lc.size()) 2686 lnorm = lc.normf(); // Otherwise got from norm tree above 2687 if (rc.size()) 2688 rnorm = rc.normf(); 2689 if (lnorm*rnorm < truncate_tol(tol, key)) { 2690 coeffs.replace(key, nodeT(coeffT(cdata.vk,targs),false)); // Zero leaf node 2691 return; 2692 } 2693 } 2694 2695 // Recur down 2696 coeffs.replace(key, nodeT(coeffT(),true)); // Interior node 2697 2698 Tensor<L> lss; 2699 if (lc.size()) { 2700 Tensor<L> ld(cdata.v2k); 2701 ld(cdata.s0) = lc(___); 2702 lss = left->unfilter(ld); 2703 } 2704 2705 Tensor<R> rss; 2706 if (rc.size()) { 2707 Tensor<R> rd(cdata.v2k); 2708 rd(cdata.s0) = rc(___); 2709 rss = right->unfilter(rd); 2710 } 2711 2712 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2713 const keyT& child = kit.key(); 2714 Tensor<L> ll; 2715 Tensor<R> rr; 2716 if (lc.size()) 2717 ll = copy(lss(child_patch(child))); 2718 if (rc.size()) 2719 rr = copy(rss(child_patch(child))); 2720 2721 woT::task(coeffs.owner(child), &implT:: template mulXXa<L,R>, child, left, ll, right, rr, tol); 2722 } 2723 } 2724 2725 2726 // Binary operation on values using recursive descent and assuming same distribution 2727 /// Both left and right functions are in the scaling function basis 2728 /// @param[in] key the key to the current function node (box) 2729 /// @param[in] left the function impl associated with the left function 2730 /// @param[in] lcin the scaling function coefficients associated with the 2731 /// current box in the left function 2732 /// @param[in] right the function impl associated with the right function 2733 /// @param[in] rcin the scaling function coefficients associated with the 2734 /// current box in the right function 2735 /// @param[in] op the binary operator 2736 template <typename L, typename R, typename opT> binaryXXa(const keyT & key,const FunctionImpl<L,NDIM> * left,const Tensor<L> & lcin,const FunctionImpl<R,NDIM> * right,const Tensor<R> & rcin,const opT & op)2737 void binaryXXa(const keyT& key, 2738 const FunctionImpl<L,NDIM>* left, const Tensor<L>& lcin, 2739 const FunctionImpl<R,NDIM>* right,const Tensor<R>& rcin, 2740 const opT& op) { 2741 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT; 2742 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator riterT; 2743 2744 Tensor<L> lc = lcin; 2745 if (lc.size() == 0) { 2746 literT it = left->coeffs.find(key).get(); 2747 MADNESS_ASSERT(it != left->coeffs.end()); 2748 if (it->second.has_coeff()) 2749 lc = it->second.coeff().full_tensor_copy(); 2750 } 2751 2752 Tensor<R> rc = rcin; 2753 if (rc.size() == 0) { 2754 riterT it = right->coeffs.find(key).get(); 2755 MADNESS_ASSERT(it != right->coeffs.end()); 2756 if (it->second.has_coeff()) 2757 rc = it->second.coeff().full_tensor_copy(); 2758 } 2759 2760 if (rc.size() && lc.size()) { // Yipee! 2761 do_binary_op<L,R>(key, lc, std::make_pair(key,rc), op); 2762 return; 2763 } 2764 2765 // Recur down 2766 coeffs.replace(key, nodeT(coeffT(),true)); // Interior node 2767 2768 Tensor<L> lss; 2769 if (lc.size()) { 2770 Tensor<L> ld(cdata.v2k); 2771 ld(cdata.s0) = lc(___); 2772 lss = left->unfilter(ld); 2773 } 2774 2775 Tensor<R> rss; 2776 if (rc.size()) { 2777 Tensor<R> rd(cdata.v2k); 2778 rd(cdata.s0) = rc(___); 2779 rss = right->unfilter(rd); 2780 } 2781 2782 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2783 const keyT& child = kit.key(); 2784 Tensor<L> ll; 2785 Tensor<R> rr; 2786 if (lc.size()) 2787 ll = copy(lss(child_patch(child))); 2788 if (rc.size()) 2789 rr = copy(rss(child_patch(child))); 2790 2791 woT::task(coeffs.owner(child), &implT:: template binaryXXa<L,R,opT>, child, left, ll, right, rr, op); 2792 } 2793 } 2794 2795 template <typename Q, typename opT> 2796 struct coeff_value_adaptor { 2797 typedef typename opT::resultT resultT; 2798 const FunctionImpl<Q,NDIM>* impl_func; 2799 opT op; 2800 coeff_value_adaptorcoeff_value_adaptor2801 coeff_value_adaptor() {}; coeff_value_adaptorcoeff_value_adaptor2802 coeff_value_adaptor(const FunctionImpl<Q,NDIM>* impl_func, 2803 const opT& op) 2804 : impl_func(impl_func), op(op) {} 2805 operatorcoeff_value_adaptor2806 Tensor<resultT> operator()(const Key<NDIM>& key, const Tensor<Q>& t) const { 2807 Tensor<Q> invalues = impl_func->coeffs2values(key, t); 2808 2809 Tensor<resultT> outvalues = op(key, invalues); 2810 2811 return impl_func->values2coeffs(key, outvalues); 2812 } 2813 2814 template <typename Archive> serializecoeff_value_adaptor2815 void serialize(Archive& ar) { 2816 ar & impl_func & op; 2817 } 2818 }; 2819 2820 /// Out of place unary operation on function impl 2821 /// The skeleton algorithm should resemble something like 2822 /// 2823 /// *this = op(*func) 2824 /// 2825 /// @param[in] key the key of the current function node (box) 2826 /// @param[in] func the function impl on which to be operated 2827 /// @param[in] op the unary operator 2828 template <typename Q, typename opT> unaryXXa(const keyT & key,const FunctionImpl<Q,NDIM> * func,const opT & op)2829 void unaryXXa(const keyT& key, 2830 const FunctionImpl<Q,NDIM>* func, const opT& op) { 2831 2832 // const Tensor<Q>& fc = func->coeffs.find(key).get()->second.full_tensor_copy(); 2833 const Tensor<Q> fc = func->coeffs.find(key).get()->second.coeff().full_tensor_copy(); 2834 2835 if (fc.size() == 0) { 2836 // Recur down 2837 coeffs.replace(key, nodeT(coeffT(),true)); // Interior node 2838 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2839 const keyT& child = kit.key(); 2840 woT::task(coeffs.owner(child), &implT:: template unaryXXa<Q,opT>, child, func, op); 2841 } 2842 } 2843 else { 2844 tensorT t=op(key,fc); 2845 coeffs.replace(key, nodeT(coeffT(t,targs),false)); // Leaf node 2846 } 2847 } 2848 2849 /// Multiplies two functions (impl's) together. Delegates to the mulXXa() method 2850 /// @param[in] left pointer to the left function impl 2851 /// @param[in] right pointer to the right function impl 2852 /// @param[in] tol numerical tolerance 2853 template <typename L, typename R> mulXX(const FunctionImpl<L,NDIM> * left,const FunctionImpl<R,NDIM> * right,double tol,bool fence)2854 void mulXX(const FunctionImpl<L,NDIM>* left, const FunctionImpl<R,NDIM>* right, double tol, bool fence) { 2855 if (world.rank() == coeffs.owner(cdata.key0)) 2856 mulXXa(cdata.key0, left, Tensor<L>(), right, Tensor<R>(), tol); 2857 if (fence) 2858 world.gop.fence(); 2859 2860 //verify_tree(); 2861 } 2862 2863 /// Performs binary operation on two functions (impl's). Delegates to the binaryXXa() method 2864 /// @param[in] left pointer to the left function impl 2865 /// @param[in] right pointer to the right function impl 2866 /// @param[in] op the binary operator 2867 template <typename L, typename R, typename opT> binaryXX(const FunctionImpl<L,NDIM> * left,const FunctionImpl<R,NDIM> * right,const opT & op,bool fence)2868 void binaryXX(const FunctionImpl<L,NDIM>* left, const FunctionImpl<R,NDIM>* right, 2869 const opT& op, bool fence) { 2870 if (world.rank() == coeffs.owner(cdata.key0)) 2871 binaryXXa(cdata.key0, left, Tensor<L>(), right, Tensor<R>(), op); 2872 if (fence) 2873 world.gop.fence(); 2874 2875 //verify_tree(); 2876 } 2877 2878 /// Performs unary operation on function impl. Delegates to the unaryXXa() method 2879 /// @param[in] func function impl of the operand 2880 /// @param[in] op the unary operator 2881 template <typename Q, typename opT> unaryXX(const FunctionImpl<Q,NDIM> * func,const opT & op,bool fence)2882 void unaryXX(const FunctionImpl<Q,NDIM>* func, const opT& op, bool fence) { 2883 if (world.rank() == coeffs.owner(cdata.key0)) 2884 unaryXXa(cdata.key0, func, op); 2885 if (fence) 2886 world.gop.fence(); 2887 2888 //verify_tree(); 2889 } 2890 2891 /// Performs unary operation on function impl. Delegates to the unaryXXa() method 2892 /// @param[in] func function impl of the operand 2893 /// @param[in] op the unary operator 2894 template <typename Q, typename opT> unaryXXvalues(const FunctionImpl<Q,NDIM> * func,const opT & op,bool fence)2895 void unaryXXvalues(const FunctionImpl<Q,NDIM>* func, const opT& op, bool fence) { 2896 if (world.rank() == coeffs.owner(cdata.key0)) 2897 unaryXXa(cdata.key0, func, coeff_value_adaptor<Q,opT>(func,op)); 2898 if (fence) 2899 world.gop.fence(); 2900 2901 //verify_tree(); 2902 } 2903 2904 /// Multiplies a function (impl) with a vector of functions (impl's). Delegates to the 2905 /// mulXXveca() method. 2906 /// @param[in] left pointer to the left function impl 2907 /// @param[in] vright vector of pointers to the right function impl's 2908 /// @param[in] tol numerical tolerance 2909 /// @param[out] vresult vector of pointers to the resulting function impl's 2910 template <typename L, typename R> mulXXvec(const FunctionImpl<L,NDIM> * left,const std::vector<const FunctionImpl<R,NDIM> * > & vright,const std::vector<FunctionImpl<T,NDIM> * > & vresult,double tol,bool fence)2911 void mulXXvec(const FunctionImpl<L,NDIM>* left, 2912 const std::vector<const FunctionImpl<R,NDIM>*>& vright, 2913 const std::vector<FunctionImpl<T,NDIM>*>& vresult, 2914 double tol, 2915 bool fence) { 2916 std::vector< Tensor<R> > vr(vright.size()); 2917 if (world.rank() == coeffs.owner(cdata.key0)) 2918 mulXXveca(cdata.key0, left, Tensor<L>(), vright, vr, vresult, tol); 2919 if (fence) 2920 world.gop.fence(); 2921 } 2922 2923 Future<double> get_norm_tree_recursive(const keyT& key) const; 2924 2925 mutable long box_leaf[1000]; 2926 mutable long box_interior[1000]; 2927 2928 // horrifically non-scalable 2929 void put_in_box(ProcessID from, long nl, long ni) const; 2930 2931 /// Prints summary of data distribution 2932 void print_info() const; 2933 2934 /// Verify tree is properly constructed ... global synchronization involved 2935 2936 /// If an inconsistency is detected, prints a message describing the error and 2937 /// then throws a madness exception. 2938 /// 2939 /// This is a reasonably quick and scalable operation that is 2940 /// useful for debugging and paranoia. 2941 void verify_tree() const; 2942 2943 /// Walk up the tree returning pair(key,node) for first node with coefficients 2944 2945 /// Three possibilities. 2946 /// 2947 /// 1) The coeffs are present and returned with the key of the containing node. 2948 /// 2949 /// 2) The coeffs are further up the tree ... the request is forwarded up. 2950 /// 2951 /// 3) The coeffs are futher down the tree ... an empty tensor is returned. 2952 /// 2953 /// !! This routine is crying out for an optimization to 2954 /// manage the number of messages being sent ... presently 2955 /// each parent is fetched 2^(n*d) times where n is the no. of 2956 /// levels between the level of evaluation and the parent. 2957 /// Alternatively, reimplement multiply as a downward tree 2958 /// walk and just pass the parent down. Slightly less 2959 /// parallelism but much less communication. 2960 /// @todo Robert .... help! 2961 void sock_it_to_me(const keyT& key, 2962 const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const; 2963 /// As above, except 2964 /// 3) The coeffs are constructed from the avg of nodes further down the tree 2965 /// @todo Robert .... help! 2966 void sock_it_to_me_too(const keyT& key, 2967 const RemoteReference< FutureImpl< std::pair<keyT,coeffT> > >& ref) const; 2968 2969 /// @todo help! 2970 void plot_cube_kernel(archive::archive_ptr< Tensor<T> > ptr, 2971 const keyT& key, 2972 const coordT& plotlo, const coordT& plothi, const std::vector<long>& npt, 2973 bool eval_refine) const; 2974 2975 2976 /// Evaluate a cube/slice of points ... plotlo and plothi are already in simulation coordinates 2977 /// No communications 2978 /// @param[in] plotlo the coordinate of the starting point 2979 /// @param[in] plothi the coordinate of the ending point 2980 /// @param[in] npt the number of points in each dimension 2981 Tensor<T> eval_plot_cube(const coordT& plotlo, 2982 const coordT& plothi, 2983 const std::vector<long>& npt, 2984 const bool eval_refine = false) const; 2985 2986 2987 /// Evaluate function only if point is local returning (true,value); otherwise return (false,0.0) 2988 2989 /// maxlevel is the maximum depth to search down to --- the max local depth can be 2990 /// computed with max_local_depth(); 2991 std::pair<bool,T> eval_local_only(const Vector<double,NDIM>& xin, Level maxlevel) ; 2992 2993 2994 /// Evaluate the function at a point in \em simulation coordinates 2995 2996 /// Only the invoking process will get the result via the 2997 /// remote reference to a future. Active messages may be sent 2998 /// to other nodes. 2999 void eval(const Vector<double,NDIM>& xin, 3000 const keyT& keyin, 3001 const typename Future<T>::remote_refT& ref); 3002 3003 /// Get the depth of the tree at a point in \em simulation coordinates 3004 3005 /// Only the invoking process will get the result via the 3006 /// remote reference to a future. Active messages may be sent 3007 /// to other nodes. 3008 /// 3009 /// This function is a minimally-modified version of eval() 3010 void evaldepthpt(const Vector<double,NDIM>& xin, 3011 const keyT& keyin, 3012 const typename Future<Level>::remote_refT& ref); 3013 3014 /// Get the rank of leaf box of the tree at a point in \em simulation coordinates 3015 3016 /// Only the invoking process will get the result via the 3017 /// remote reference to a future. Active messages may be sent 3018 /// to other nodes. 3019 /// 3020 /// This function is a minimally-modified version of eval() 3021 void evalR(const Vector<double,NDIM>& xin, 3022 const keyT& keyin, 3023 const typename Future<long>::remote_refT& ref); 3024 3025 3026 /// Computes norm of low/high-order polyn. coeffs for autorefinement test 3027 3028 /// t is a k^d tensor. In order to screen the autorefinement 3029 /// during multiplication compute the norms of 3030 /// ... lo ... the block of t for all polynomials of order < k/2 3031 /// ... hi ... the block of t for all polynomials of order >= k/2 3032 /// 3033 /// k=5 0,1,2,3,4 --> 0,1,2 ... 3,4 3034 /// k=6 0,1,2,3,4,5 --> 0,1,2 ... 3,4,5 3035 /// 3036 /// k=number of wavelets, so k=5 means max order is 4, so max exactly 3037 /// representable squarable polynomial is of order 2. 3038 void tnorm(const tensorT& t, double* lo, double* hi) const; 3039 3040 // This invoked if node has not been autorefined 3041 void do_square_inplace(const keyT& key); 3042 3043 // This invoked if node has been autorefined 3044 void do_square_inplace2(const keyT& parent, const keyT& child, const tensorT& parent_coeff); 3045 3046 /// Always returns false (for when autorefine is not wanted) 3047 bool noautorefine(const keyT& key, const tensorT& t) const; 3048 3049 /// Returns true if this block of coeffs needs autorefining 3050 bool autorefine_square_test(const keyT& key, const nodeT& t) const; 3051 3052 /// Pointwise squaring of function with optional global fence 3053 3054 /// If not autorefining, local computation only if not fencing. 3055 /// If autorefining, may result in asynchronous communication. 3056 void square_inplace(bool fence); 3057 void abs_inplace(bool fence); 3058 void abs_square_inplace(bool fence); 3059 3060 /// is this the same as trickle_down() ? 3061 void sum_down_spawn(const keyT& key, const coeffT& s); 3062 3063 /// After 1d push operator must sum coeffs down the tree to restore correct scaling function coefficients 3064 void sum_down(bool fence); 3065 3066 /// perform this multiplication: h(1,2) = f(1,2) * g(1) 3067 template<size_t LDIM> 3068 struct multiply_op { 3069 randomizemultiply_op3070 static bool randomize() {return false;} 3071 typedef CoeffTracker<T,NDIM> ctT; 3072 typedef CoeffTracker<T,LDIM> ctL; 3073 typedef multiply_op<LDIM> this_type; 3074 3075 implT* h; ///< the result function h(1,2) = f(1,2) * g(1) 3076 ctT f; 3077 ctL g; 3078 int particle; ///< if g is g(1) or g(2) 3079 multiply_opmultiply_op3080 multiply_op() : particle(1) {} 3081 multiply_opmultiply_op3082 multiply_op(implT* h, const ctT& f, const ctL& g, const int particle) 3083 : h(h), f(f), g(g), particle(particle) {}; 3084 3085 /// return true if this will be a leaf node 3086 3087 /// use generalization of tnorm for a GenTensor screenmultiply_op3088 bool screen(const coeffT& fcoeff, const coeffT& gcoeff, const keyT& key) const { 3089 double glo=0.0, ghi=0.0, flo=0.0, fhi=0.0; 3090 MADNESS_ASSERT(gcoeff.tensor_type()==TT_FULL); 3091 g.get_impl()->tnorm(gcoeff.full_tensor(), &glo, &ghi); 3092 3093 // this assumes intimate knowledge of how a GenTensor is organized! 3094 MADNESS_ASSERT(fcoeff.tensor_type()==TT_2D); 3095 const long rank=fcoeff.rank(); 3096 const long maxk=fcoeff.dim(0); 3097 tensorT vec=fcoeff.config().ref_vector(particle-1).reshape(rank,maxk,maxk,maxk); 3098 for (long i=0; i<rank; ++i) { 3099 double lo,hi; 3100 tensorT c=vec(Slice(i,i),_,_,_).reshape(maxk,maxk,maxk); 3101 g.get_impl()->tnorm(c, &lo, &hi); // note we use g instead of h, since g is 3D 3102 flo+=lo*fcoeff.config().weights(i); 3103 fhi+=hi*fcoeff.config().weights(i); 3104 } 3105 double total_hi=glo*fhi + ghi*flo + fhi*ghi; 3106 return (total_hi<h->truncate_tol(h->get_thresh(),key)); 3107 3108 } 3109 3110 /// apply this on a FunctionNode of f and g of Key key 3111 3112 /// @param[in] key key for FunctionNode in f and g, (g: broken into particles) 3113 /// @return <this node is a leaf, coefficients of this node> operatormultiply_op3114 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const { 3115 3116 // bool is_leaf=(not fdatum.second.has_children()); 3117 // if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT()); 3118 3119 // break key into particles (these are the child keys, with f/gdatum come the parent keys) 3120 Key<LDIM> key1,key2; 3121 key.break_apart(key1,key2); 3122 const Key<LDIM> gkey= (particle==1) ? key1 : key2; 3123 3124 // get coefficients of the actual FunctionNode 3125 coeffT coeff1=f.get_impl()->parent_to_child(f.coeff(),f.key(),key); 3126 coeff1.normalize(); 3127 const coeffT coeff2=g.get_impl()->parent_to_child(g.coeff(),g.key(),gkey); 3128 3129 // multiplication is done in TT_2D 3130 coeffT coeff1_2D=coeff1.convert(TensorArgs(h->get_thresh(),TT_2D)); 3131 coeff1_2D.normalize(); 3132 3133 bool is_leaf=screen(coeff1_2D,coeff2,key); 3134 if (key.level()<2) is_leaf=false; 3135 3136 coeffT hcoeff; 3137 if (is_leaf) { 3138 3139 // convert coefficients to values 3140 coeffT hvalues=f.get_impl()->coeffs2values(key,coeff1_2D); 3141 coeffT gvalues=g.get_impl()->coeffs2values(gkey,coeff2); 3142 3143 // perform multiplication 3144 coeffT result_val=h->multiply(hvalues,gvalues,particle-1); 3145 3146 hcoeff=h->values2coeffs(key,result_val); 3147 3148 // conversion on coeffs, not on values, because it implies truncation! 3149 if (hcoeff.tensor_type()!=h->get_tensor_type()) 3150 hcoeff=hcoeff.convert(h->get_tensor_args()); 3151 } 3152 3153 return std::pair<bool,coeffT> (is_leaf,hcoeff); 3154 } 3155 make_childmultiply_op3156 this_type make_child(const keyT& child) const { 3157 3158 // break key into particles 3159 Key<LDIM> key1, key2; 3160 child.break_apart(key1,key2); 3161 const Key<LDIM> gkey= (particle==1) ? key1 : key2; 3162 3163 return this_type(h,f.make_child(child),g.make_child(gkey),particle); 3164 } 3165 activatemultiply_op3166 Future<this_type> activate() const { 3167 Future<ctT> f1=f.activate(); 3168 Future<ctL> g1=g.activate(); 3169 return h->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 3170 &this_type::forward_ctor),h,f1,g1,particle); 3171 } 3172 forward_ctormultiply_op3173 this_type forward_ctor(implT* h1, const ctT& f1, const ctL& g1, const int particle) { 3174 return this_type(h1,f1,g1,particle); 3175 } 3176 serializemultiply_op3177 template <typename Archive> void serialize(const Archive& ar) { 3178 ar & h & f & g; 3179 } 3180 }; 3181 3182 3183 /// add two functions f and g: result=alpha * f + beta * g 3184 struct add_op { 3185 3186 typedef CoeffTracker<T,NDIM> ctT; 3187 typedef add_op this_type; 3188 randomizeadd_op3189 bool randomize() const {return false;} 3190 3191 /// tracking coeffs of first and second addend 3192 ctT f,g; 3193 /// prefactor for f, g 3194 double alpha, beta; 3195 add_opadd_op3196 add_op() {}; add_opadd_op3197 add_op(const ctT& f, const ctT& g, const double alpha, const double beta) 3198 : f(f), g(g), alpha(alpha), beta(beta){} 3199 3200 /// if we are at the bottom of the trees, return the sum of the coeffs operatoradd_op3201 std::pair<bool,coeffT> operator()(const keyT& key) const { 3202 3203 bool is_leaf=(f.is_leaf() and g.is_leaf()); 3204 if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT()); 3205 3206 coeffT fcoeff=f.get_impl()->parent_to_child(f.coeff(),f.key(),key); 3207 coeffT gcoeff=g.get_impl()->parent_to_child(g.coeff(),g.key(),key); 3208 coeffT hcoeff=copy(fcoeff); 3209 hcoeff.gaxpy(alpha,gcoeff,beta); 3210 hcoeff.reduce_rank(f.get_impl()->get_tensor_args().thresh); 3211 return std::pair<bool,coeffT> (is_leaf,hcoeff); 3212 } 3213 make_childadd_op3214 this_type make_child(const keyT& child) const { 3215 return this_type(f.make_child(child),g.make_child(child),alpha,beta); 3216 } 3217 3218 /// retrieve the coefficients (parent coeffs might be remote) activateadd_op3219 Future<this_type> activate() const { 3220 Future<ctT> f1=f.activate(); 3221 Future<ctT> g1=g.activate(); 3222 return f.get_impl()->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 3223 &this_type::forward_ctor),f1,g1,alpha,beta); 3224 } 3225 3226 /// taskq-compatible ctor forward_ctoradd_op3227 this_type forward_ctor(const ctT& f1, const ctT& g1, const double alpha, const double beta) { 3228 return this_type(f1,g1,alpha,beta); 3229 } 3230 serializeadd_op3231 template <typename Archive> void serialize(const Archive& ar) { 3232 ar & f & g & alpha & beta; 3233 } 3234 3235 }; 3236 3237 /// multiply f (a pair function of NDIM) with an orbital g (LDIM=NDIM/2) 3238 3239 /// as in (with h(1,2)=*this) : h(1,2) = g(1) * f(1,2) 3240 /// use tnorm as a measure to determine if f (=*this) must be refined 3241 /// @param[in] f the NDIM function f=f(1,2) 3242 /// @param[in] g the LDIM function g(1) (or g(2)) 3243 /// @param[in] particle 1 or 2, as in g(1) or g(2) 3244 template<size_t LDIM> multiply(const implT * f,const FunctionImpl<T,LDIM> * g,const int particle)3245 void multiply(const implT* f, const FunctionImpl<T,LDIM>* g, const int particle) { 3246 3247 keyT key0=f->cdata.key0; 3248 3249 if (world.rank() == coeffs.owner(key0)) { 3250 3251 CoeffTracker<T,NDIM> ff(f); 3252 CoeffTracker<T,LDIM> gg(g); 3253 3254 typedef multiply_op<LDIM> coeff_opT; 3255 coeff_opT coeff_op(this,ff,gg,particle); 3256 3257 typedef insert_op<T,NDIM> apply_opT; 3258 apply_opT apply_op(this); 3259 3260 ProcessID p= coeffs.owner(key0); 3261 woT::task(p, &implT:: template forward_traverse<coeff_opT,apply_opT>, coeff_op, apply_op, key0); 3262 } 3263 3264 this->compressed=false; 3265 } 3266 3267 /// Hartree product of two LDIM functions to yield a NDIM = 2*LDIM function 3268 template<size_t LDIM, typename leaf_opT> 3269 struct hartree_op { randomizehartree_op3270 bool randomize() const {return false;} 3271 3272 typedef hartree_op<LDIM,leaf_opT> this_type; 3273 typedef CoeffTracker<T,LDIM> ctL; 3274 3275 implT* result; ///< where to construct the pair function 3276 ctL p1, p2; ///< tracking coeffs of the two lo-dim functions 3277 leaf_opT leaf_op; ///< determine if a given node will be a leaf node 3278 3279 // ctor hartree_ophartree_op3280 hartree_op() {} hartree_ophartree_op3281 hartree_op(implT* result, const ctL& p11, const ctL& p22, const leaf_opT& leaf_op) 3282 : result(result), p1(p11), p2(p22), leaf_op(leaf_op) { 3283 MADNESS_ASSERT(LDIM+LDIM==NDIM); 3284 } 3285 operatorhartree_op3286 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const { 3287 3288 // break key into particles (these are the child keys, with datum1/2 come the parent keys) 3289 Key<LDIM> key1,key2; 3290 key.break_apart(key1,key2); 3291 3292 // this returns the appropriate NS coeffs for key1 and key2 resp. 3293 const coeffT fcoeff=p1.coeff(key1); 3294 const coeffT gcoeff=p2.coeff(key2); 3295 bool is_leaf=leaf_op(key,fcoeff.full_tensor(),gcoeff.full_tensor()); 3296 if (not is_leaf) return std::pair<bool,coeffT> (is_leaf,coeffT()); 3297 3298 // extract the sum coeffs from the NS coeffs 3299 const coeffT s1=fcoeff(p1.get_impl()->cdata.s0); 3300 const coeffT s2=gcoeff(p2.get_impl()->cdata.s0); 3301 3302 // new coeffs are simply the hartree/kronecker/outer product -- 3303 coeffT coeff=outer(s1,s2,result->get_tensor_args()); 3304 // no post-determination 3305 // is_leaf=leaf_op(key,coeff); 3306 return std::pair<bool,coeffT>(is_leaf,coeff); 3307 } 3308 make_childhartree_op3309 this_type make_child(const keyT& child) const { 3310 3311 // break key into particles 3312 Key<LDIM> key1, key2; 3313 child.break_apart(key1,key2); 3314 3315 return this_type(result,p1.make_child(key1),p2.make_child(key2),leaf_op); 3316 } 3317 activatehartree_op3318 Future<this_type> activate() const { 3319 Future<ctL> p11=p1.activate(); 3320 Future<ctL> p22=p2.activate(); 3321 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 3322 &this_type::forward_ctor),result,p11,p22,leaf_op); 3323 } 3324 forward_ctorhartree_op3325 this_type forward_ctor(implT* result1, const ctL& p11, const ctL& p22, const leaf_opT& leaf_op) { 3326 return this_type(result1,p11,p22,leaf_op); 3327 } 3328 serializehartree_op3329 template <typename Archive> void serialize(const Archive& ar) { 3330 ar & result & p1 & p2 & leaf_op; 3331 } 3332 }; 3333 3334 /// traverse a non-existing tree 3335 3336 /// part II: activate coeff_op, i.e. retrieve all the necessary remote boxes (communication) 3337 /// @param[in] coeff_op operator making the coefficients that needs activation 3338 /// @param[in] apply_op just passing thru 3339 /// @param[in] key the key we are working on 3340 template<typename coeff_opT, typename apply_opT> forward_traverse(const coeff_opT & coeff_op,const apply_opT & apply_op,const keyT & key)3341 void forward_traverse(const coeff_opT& coeff_op, const apply_opT& apply_op, const keyT& key) const { 3342 MADNESS_ASSERT(coeffs.is_local(key)); 3343 Future<coeff_opT> active_coeff=coeff_op.activate(); 3344 woT::task(world.rank(), &implT:: template traverse_tree<coeff_opT,apply_opT>, active_coeff, apply_op, key); 3345 } 3346 3347 3348 /// traverse a non-existing tree 3349 3350 /// part I: make the coefficients, process them and continue the recursion if necessary 3351 /// @param[in] coeff_op operator making the coefficients and determining them being leaves 3352 /// @param[in] apply_op operator processing the coefficients 3353 /// @param[in] key the key we are currently working on 3354 template<typename coeff_opT, typename apply_opT> traverse_tree(const coeff_opT & coeff_op,const apply_opT & apply_op,const keyT & key)3355 void traverse_tree(const coeff_opT& coeff_op, const apply_opT& apply_op, const keyT& key) const { 3356 MADNESS_ASSERT(coeffs.is_local(key)); 3357 3358 typedef typename std::pair<bool,coeffT> argT; 3359 const argT arg=coeff_op(key); 3360 apply_op.operator()(key,arg.second,arg.first); 3361 3362 const bool has_children=(not arg.first); 3363 if (has_children) { 3364 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 3365 const keyT& child=kit.key(); 3366 coeff_opT child_op=coeff_op.make_child(child); 3367 // spawn activation where child is local 3368 ProcessID p=coeffs.owner(child); 3369 3370 void (implT::*ft)(const coeff_opT&, const apply_opT&, const keyT&) const = &implT::forward_traverse<coeff_opT,apply_opT>; 3371 3372 woT::task(p, ft, child_op, apply_op, child); 3373 } 3374 } 3375 } 3376 3377 3378 /// given two functions of LDIM, perform the Hartree/Kronecker/outer product 3379 3380 /// |Phi(1,2)> = |phi(1)> x |phi(2)> 3381 /// @param[in] p1 FunctionImpl of particle 1 3382 /// @param[in] p2 FunctionImpl of particle 2 3383 /// @param[in] leaf_op operator determining of a given box will be a leaf 3384 template<std::size_t LDIM, typename leaf_opT> hartree_product(const FunctionImpl<T,LDIM> * p1,const FunctionImpl<T,LDIM> * p2,const leaf_opT & leaf_op,bool fence)3385 void hartree_product(const FunctionImpl<T,LDIM>* p1, const FunctionImpl<T,LDIM>* p2, 3386 const leaf_opT& leaf_op, bool fence) { 3387 MADNESS_ASSERT(p1->is_nonstandard()); 3388 MADNESS_ASSERT(p2->is_nonstandard()); 3389 3390 const keyT key0=cdata.key0; 3391 3392 if (world.rank() == this->get_coeffs().owner(key0)) { 3393 3394 // prepare the CoeffTracker 3395 CoeffTracker<T,LDIM> iap1(p1); 3396 CoeffTracker<T,LDIM> iap2(p2); 3397 3398 // the operator making the coefficients 3399 typedef hartree_op<LDIM,leaf_opT> coeff_opT; 3400 coeff_opT coeff_op(this,iap1,iap2,leaf_op); 3401 3402 // this operator simply inserts the coeffs into this' tree 3403 typedef insert_op<T,NDIM> apply_opT; 3404 apply_opT apply_op(this); 3405 3406 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>, 3407 coeff_op, apply_op, cdata.key0); 3408 3409 } 3410 3411 this->compressed=false; 3412 if (fence) world.gop.fence(); 3413 } 3414 3415 3416 template <typename opT, typename R> 3417 void apply_1d_realspace_push_op(const archive::archive_ptr<const opT> & pop,int axis,const keyT & key,const Tensor<R> & c)3418 apply_1d_realspace_push_op(const archive::archive_ptr<const opT>& pop, int axis, const keyT& key, const Tensor<R>& c) { 3419 const opT* op = pop.ptr; 3420 const Level n = key.level(); 3421 const double cnorm = c.normf(); 3422 const double tol = truncate_tol(thresh, key)*0.1; // ??? why this value???? 3423 3424 Vector<Translation,NDIM> lnew(key.translation()); 3425 const Translation lold = lnew[axis]; 3426 const Translation maxs = Translation(1)<<n; 3427 3428 int nsmall = 0; // Counts neglected blocks to terminate s loop 3429 for (Translation s=0; s<maxs; ++s) { 3430 int maxdir = s ? 1 : -1; 3431 for (int direction=-1; direction<=maxdir; direction+=2) { 3432 lnew[axis] = lold + direction*s; 3433 if (lnew[axis] >= 0 && lnew[axis] < maxs) { // NON-ZERO BOUNDARY CONDITIONS IGNORED HERE !!!!!!!!!!!!!!!!!!!! 3434 const Tensor<typename opT::opT>& r = op->rnlij(n, s*direction, true); 3435 double Rnorm = r.normf(); 3436 3437 if (Rnorm == 0.0) { 3438 return; // Hard zero means finished! 3439 } 3440 3441 if (s <= 1 || r.normf()*cnorm > tol) { // Always do kernel and neighbor 3442 nsmall = 0; 3443 tensorT result = transform_dir(c,r,axis); 3444 3445 if (result.normf() > tol*0.3) { 3446 Key<NDIM> dest(n,lnew); 3447 coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri()); 3448 } 3449 } 3450 else { 3451 ++nsmall; 3452 } 3453 } 3454 else { 3455 ++nsmall; 3456 } 3457 } 3458 if (nsmall >= 4) { 3459 // If have two negligble blocks in 3460 // succession in each direction interpret 3461 // this as the operator being zero beyond 3462 break; 3463 } 3464 } 3465 } 3466 3467 template <typename opT, typename R> 3468 void apply_1d_realspace_push(const opT & op,const FunctionImpl<R,NDIM> * f,int axis,bool fence)3469 apply_1d_realspace_push(const opT& op, const FunctionImpl<R,NDIM>* f, int axis, bool fence) { 3470 MADNESS_ASSERT(!f->is_compressed()); 3471 3472 typedef typename FunctionImpl<R,NDIM>::dcT::const_iterator fiterT; 3473 typedef FunctionNode<R,NDIM> fnodeT; 3474 fiterT end = f->coeffs.end(); 3475 ProcessID me = world.rank(); 3476 for (fiterT it=f->coeffs.begin(); it!=end; ++it) { 3477 const fnodeT& node = it->second; 3478 if (node.has_coeff()) { 3479 const keyT& key = it->first; 3480 const Tensor<R>& c = node.coeff().full_tensor_copy(); 3481 woT::task(me, &implT:: template apply_1d_realspace_push_op<opT,R>, 3482 archive::archive_ptr<const opT>(&op), axis, key, c); 3483 } 3484 } 3485 if (fence) world.gop.fence(); 3486 } 3487 3488 void forward_do_diff1(const DerivativeBase<T,NDIM>* D, 3489 const implT* f, 3490 const keyT& key, 3491 const std::pair<keyT,coeffT>& left, 3492 const std::pair<keyT,coeffT>& center, 3493 const std::pair<keyT,coeffT>& right); 3494 3495 void do_diff1(const DerivativeBase<T,NDIM>* D, 3496 const implT* f, 3497 const keyT& key, 3498 const std::pair<keyT,coeffT>& left, 3499 const std::pair<keyT,coeffT>& center, 3500 const std::pair<keyT,coeffT>& right); 3501 3502 // Called by result function to differentiate f 3503 void diff(const DerivativeBase<T,NDIM>* D, const implT* f, bool fence); 3504 3505 /// Returns key of general neighbor enforcing BC 3506 3507 /// Out of volume keys are mapped to enforce the BC as follows. 3508 /// * Periodic BC map back into the volume and return the correct key 3509 /// * Zero BC - returns invalid() to indicate out of volume 3510 keyT neighbor(const keyT& key, const keyT& disp, const std::vector<bool>& is_periodic) const; 3511 3512 /// find_me. Called by diff_bdry to get coefficients of boundary function 3513 Future< std::pair<keyT,coeffT> > find_me(const keyT& key) const; 3514 3515 /// return the a std::pair<key, node>, which MUST exist 3516 std::pair<Key<NDIM>,ShallowNode<T,NDIM> > find_datum(keyT key) const; 3517 3518 /// multiply the ket with a one-electron potential rr(1,2)= f(1,2)*g(1) 3519 3520 /// @param[in] val_ket function values of f(1,2) 3521 /// @param[in] val_pot function values of g(1) 3522 /// @param[in] particle if 0 then g(1), if 1 then g(2) 3523 /// @return the resulting function values 3524 coeffT multiply(const coeffT& val_ket, const coeffT& val_pot, int particle) const; 3525 3526 3527 /// given several coefficient tensors, assemble a result tensor 3528 3529 /// the result looks like: (v(1,2) + v(1) + v(2)) |ket(1,2)> 3530 /// or (v(1,2) + v(1) + v(2)) |p(1) p(2)> 3531 /// i.e. coefficients for the ket and coefficients for the two particles are 3532 /// mutually exclusive. All potential terms are optional, just pass in empty coeffs. 3533 /// @param[in] key the key of the FunctionNode to which these coeffs belong 3534 /// @param[in] coeff_ket coefficients of the ket 3535 /// @param[in] vpotential1 function values of the potential for particle 1 3536 /// @param[in] vpotential2 function values of the potential for particle 2 3537 /// @param[in] veri function values for the 2-particle potential 3538 coeffT assemble_coefficients(const keyT& key, const coeffT& coeff_ket, 3539 const coeffT& vpotential1, const coeffT& vpotential2, 3540 const tensorT& veri) const; 3541 3542 /// given a ket and the 1- and 2-electron potentials, construct the function V phi 3543 3544 /// small memory footstep version of Vphi_op: use the NS form to have information 3545 /// about parent and children to determine if a box is a leaf. This will require 3546 /// compression of the constituent functions, which will lead to more memory usage 3547 /// there, but will avoid oversampling of the result function. 3548 template<typename opT, size_t LDIM> 3549 struct Vphi_op_NS { 3550 randomizeVphi_op_NS3551 bool randomize() const {return true;} 3552 3553 typedef Vphi_op_NS<opT,LDIM> this_type; 3554 typedef CoeffTracker<T,NDIM> ctT; 3555 typedef CoeffTracker<T,LDIM> ctL; 3556 3557 implT* result; ///< where to construct Vphi, no need to track parents 3558 opT leaf_op; ///< deciding if a given FunctionNode will be a leaf node 3559 ctT iaket; ///< the ket of a pair function (exclusive with p1, p2) 3560 ctL iap1, iap2; ///< the particles 1 and 2 (exclusive with ket) 3561 ctL iav1, iav2; ///< potentials for particles 1 and 2 3562 const implT* eri; ///< 2-particle potential, must be on-demand 3563 3564 // ctor Vphi_op_NSVphi_op_NS3565 Vphi_op_NS() {} Vphi_op_NSVphi_op_NS3566 Vphi_op_NS(implT* result, const opT& leaf_op, const ctT& iaket, 3567 const ctL& iap1, const ctL& iap2, const ctL& iav1, const ctL& iav2, 3568 const implT* eri) 3569 : result(result), leaf_op(leaf_op), iaket(iaket), iap1(iap1), iap2(iap2) 3570 , iav1(iav1), iav2(iav2), eri(eri) { 3571 3572 // 2-particle potential must be on-demand 3573 if (eri) MADNESS_ASSERT(eri->is_on_demand()); 3574 } 3575 3576 /// make and insert the coefficients into result's tree operatorVphi_op_NS3577 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const { 3578 3579 3580 if(leaf_op.do_pre_screening()){ 3581 // this means that we only construct the boxes which are leaf boxes from the other function in the leaf_op 3582 if(leaf_op.pre_screening(key)){ 3583 // construct sum_coefficients, insert them and leave 3584 const coeffT sum_coeff=make_sum_coeffs(key); 3585 result->get_coeffs().replace(key,nodeT(sum_coeff,false)); 3586 return std::pair<bool,coeffT> (true,coeffT()); 3587 }else{ 3588 result->get_coeffs().replace(key,nodeT(coeffT(),true)); 3589 return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key); 3590 } 3591 }else{ 3592 // this means that the function has to be completely constructed and not mirrored by another function 3593 3594 // if the initial level is not reached then this must not be a leaf box 3595 size_t il = result->get_initial_level(); 3596 if(FunctionDefaults<NDIM>::get_refine()) il+=1; 3597 if(key.level()<il){ 3598 //std::cout << "n=" + std::to_string(key.level()) + " below initial level " + std::to_string(result->get_initial_level()) + "\n"; 3599 // insert empty coeffs for this box and send off jobs for the children 3600 result->get_coeffs().replace(key,nodeT(coeffT(),true)); 3601 return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key); 3602 } 3603 // if further refinement is needed (because we are at a special box, special point) 3604 // and the special_level is not reached then this must not be a leaf box 3605 if(key.level()<result->get_special_level() and leaf_op.special_refinement_needed(key)){ 3606 //std::cout << "special refinement for n=" + std::to_string(key.level()) + "\n"; 3607 // insert empty coeffs for this box and send off jobs for the children 3608 result->get_coeffs().replace(key,nodeT(coeffT(),true)); 3609 return continue_recursion(std::vector<bool>(1<<NDIM,false),tensorT(),key); 3610 } 3611 3612 3613 coeffT sum_coeff=make_sum_coeffs(key); 3614 3615 if(leaf_op.post_screening(key,sum_coeff)){ 3616 result->get_coeffs().replace(key,nodeT(sum_coeff,false)); 3617 //std::cout << "n=" + std::to_string(key.level()) + " is leaf by post_screening\n"; 3618 return std::pair<bool,coeffT> (true,coeffT()); 3619 } 3620 3621 // do the conventional error-measurement and use the computed child coeffs to determine if they will be leafs 3622 tensorT children_sum_coeffs=make_childrens_sum_coeffs(key); 3623 tensorT d=result->filter(children_sum_coeffs); 3624 3625 // since they will be better anyway (those are only the s coeffs, not the d) 3626 sum_coeff=coeffT(copy(d(result->get_cdata().s0)),result->get_tensor_args()); 3627 3628 // delte s coeffs from the children tensor to get the d coeffs and the error 3629 d(result->get_cdata().s0)=0.0; 3630 double error=d.normf(); 3631 3632 if(error<result->truncate_tol(result->get_thresh(),key)){ 3633 result->get_coeffs().replace(key,nodeT(sum_coeff,false)); 3634 //std::cout << "n=" + std::to_string(key.level()) + " is leaf by conventional error measurement (" + std::to_string(error)+ ")\n"; 3635 return std::pair<bool,coeffT> (true,coeffT()); 3636 } 3637 3638 // at this point the current box will not become a leaf anymore, but we can pre-screen the chidren 3639 // if no screening is enabled then the child boxes are compared to the downsampled parent boxes, if they are represented well they will be leaves 3640 std::vector<bool> child_is_leaf(1<<NDIM); 3641 std::size_t i=0; 3642 for (KeyChildIterator<NDIM> kit(key); kit; ++kit, ++i) { 3643 // post-determination for this child's coeffs 3644 coeffT child_coeff=coeffT(copy(children_sum_coeffs(result->child_patch(kit.key()))), 3645 result->get_tensor_args()); 3646 child_is_leaf[i]=leaf_op.post_screening(kit.key(),child_coeff); 3647 3648 // compare to parent sum coeffs (failsafe) 3649 child_is_leaf[i]=(child_is_leaf[i] or leaf_op.compare_to_parent(kit.key(),child_coeff,sum_coeff)); 3650 } 3651 // insert empty sum coeffs for this box and 3652 // send off the tasks for those children that might not be leaves; 3653 result->get_coeffs().replace(key,nodeT(coeffT(),true)); 3654 //std::cout << "n=" + std::to_string(key.level()) + " is no leaf\n"; 3655 return continue_recursion(child_is_leaf,children_sum_coeffs,key); 3656 } 3657 3658 MADNESS_EXCEPTION("you should not be here",1); 3659 return std::pair<bool,coeffT> (true,coeffT()); 3660 } 3661 3662 3663 /// loop over all children and either insert their sum coeffs or continue the recursion 3664 3665 /// @param[in] child_is_leaf for each child: is it a leaf? 3666 /// @param[in] coeffs coefficient tensor with 2^N sum coeffs (=unfiltered NS coeffs) 3667 /// @param[in] key the key for the NS coeffs (=parent key of the children) 3668 /// @return to avoid recursion outside this return: std::pair<is_leaf,coeff> = true,coeffT() continue_recursionVphi_op_NS3669 std::pair<bool,coeffT> continue_recursion(const std::vector<bool> child_is_leaf, 3670 const tensorT& coeffs, const keyT& key) const { 3671 std::size_t i=0; 3672 for (KeyChildIterator<NDIM> kit(key); kit; ++kit, ++i) { 3673 keyT child=kit.key(); 3674 bool is_leaf=child_is_leaf[i]; 3675 3676 if (is_leaf) { 3677 // insert the sum coeffs 3678 insert_op<T,NDIM> iop(result); 3679 iop(child,coeffT(copy(coeffs(result->child_patch(child))),result->get_tensor_args()),is_leaf); 3680 } else { 3681 this_type child_op=this->make_child(child); 3682 noop<T,NDIM> no; 3683 // spawn activation where child is local 3684 ProcessID p=result->get_coeffs().owner(child); 3685 3686 void (implT::*ft)(const Vphi_op_NS<opT,LDIM>&, const noop<T,NDIM>&, const keyT&) const = &implT:: template forward_traverse< Vphi_op_NS<opT,LDIM>, noop<T,NDIM> >; 3687 result->task(p, ft, child_op, no, child); 3688 } 3689 } 3690 // return e sum coeffs; also return always is_leaf=true: 3691 // the recursion is continued within this struct, not outside in traverse_tree! 3692 return std::pair<bool,coeffT> (true,coeffT()); 3693 } 3694 3695 /// return the values of the 2-particle potential 3696 3697 /// @param[in] key the key for which the values are requested 3698 /// @return val_eri the values in full tensor form eri_valuesVphi_op_NS3699 tensorT eri_values(const keyT& key) const { 3700 tensorT val_eri; 3701 if (eri and eri->is_on_demand()) { 3702 if (eri->get_functor()->provides_coeff()) { 3703 val_eri=eri->coeffs2values( 3704 key,eri->get_functor()->coeff(key).full_tensor()); 3705 } else { 3706 val_eri=tensorT(eri->cdata.vk); 3707 eri->fcube(key,*(eri->get_functor()),eri->cdata.quad_x,val_eri); 3708 } 3709 } 3710 return val_eri; 3711 } 3712 3713 /// make the sum coeffs for key make_sum_coeffsVphi_op_NS3714 coeffT make_sum_coeffs(const keyT& key) const { 3715 // break key into particles 3716 Key<LDIM> key1, key2; 3717 key.break_apart(key1,key2); 3718 3719 TensorArgs targs=result->get_tensor_args(); 3720 // use the ket coeffs if they are there, or make them by hartree product 3721 const coeffT coeff_ket_NS = (iaket.get_impl()) 3722 ? iaket.coeff(key) 3723 : outer(iap1.coeff(key1),iap2.coeff(key2),targs); 3724 3725 coeffT val_potential1, val_potential2; 3726 if (iav1.get_impl()) { 3727 coeffT tmp=iav1.coeff(key1)(iav1.get_impl()->get_cdata().s0); 3728 val_potential1=iav1.get_impl()->coeffs2values(key1,tmp); 3729 } 3730 if (iav2.get_impl()) { 3731 coeffT tmp=iav2.coeff(key2)(iav2.get_impl()->get_cdata().s0); 3732 val_potential2=iav2.get_impl()->coeffs2values(key2,tmp); 3733 } 3734 coeffT tmp=coeff_ket_NS(result->get_cdata().s0); 3735 3736 return result->assemble_coefficients(key,tmp, 3737 val_potential1,val_potential2,eri_values(key)); 3738 } 3739 3740 /// make the sum coeffs for all children of key make_childrens_sum_coeffsVphi_op_NS3741 tensorT make_childrens_sum_coeffs(const keyT& key) const { 3742 // break key into particles 3743 Key<LDIM> key1, key2; 3744 key.break_apart(key1,key2); 3745 TensorArgs targs=result->get_tensor_args(); 3746 3747 // use the ket coeffs if they are there, or make them by hartree product 3748 const coeffT coeff_ket_NS = (iaket.get_impl()) 3749 ? iaket.coeff(key) 3750 : outer(iap1.coeff(key1),iap2.coeff(key2),targs); 3751 3752 // get the sum coeffs for all children 3753 const coeffT coeff_ket_unfiltered=result->unfilter(coeff_ket_NS); 3754 const coeffT coeff_v1_unfiltered=(iav1.get_impl()) 3755 ? iav1.get_impl()->unfilter(iav1.coeff(key1)) : coeffT(); 3756 const coeffT coeff_v2_unfiltered=(iav2.get_impl()) 3757 ? iav2.get_impl()->unfilter(iav2.coeff(key2)) : coeffT(); 3758 3759 // result sum coeffs of all children 3760 tensorT s_coeffs(result->cdata.v2k); 3761 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 3762 3763 // break key into particles 3764 Key<LDIM> child1, child2; 3765 kit.key().break_apart(child1,child2); 3766 3767 // make the values of the potentials for each child 3768 // transform the child patch of s coeffs to values 3769 coeffT val_potential1, val_potential2; 3770 if (iav1.get_impl()) { 3771 coeffT tmp=coeff_v1_unfiltered(iav1.get_impl()->child_patch(child1)); 3772 val_potential1=iav1.get_impl()->coeffs2values(child1,tmp); 3773 } 3774 if (iav2.get_impl()) { 3775 coeffT tmp=coeff_v2_unfiltered(iav2.get_impl()->child_patch(child2)); 3776 val_potential2=iav2.get_impl()->coeffs2values(child2,tmp); 3777 } 3778 const coeffT coeff_ket=coeff_ket_unfiltered(result->child_patch(kit.key())); 3779 3780 // the sum coeffs for this child 3781 const tensorT val_eri=eri_values(kit.key()); 3782 const coeffT coeff_result=result->assemble_coefficients(kit.key(),coeff_ket, 3783 val_potential1,val_potential2,val_eri); 3784 3785 // accumulate the sum coeffs of the children here 3786 s_coeffs(result->child_patch(kit.key()))+=coeff_result.full_tensor_copy(); 3787 } 3788 return s_coeffs; 3789 3790 } 3791 make_childVphi_op_NS3792 this_type make_child(const keyT& child) const { 3793 3794 // break key into particles 3795 Key<LDIM> key1, key2; 3796 child.break_apart(key1,key2); 3797 3798 return this_type(result,leaf_op,iaket.make_child(child), 3799 iap1.make_child(key1),iap2.make_child(key2), 3800 iav1.make_child(key1),iav2.make_child(key2),eri); 3801 } 3802 activateVphi_op_NS3803 Future<this_type> activate() const { 3804 Future<ctT> iaket1=iaket.activate(); 3805 Future<ctL> iap11=iap1.activate(); 3806 Future<ctL> iap21=iap2.activate(); 3807 Future<ctL> iav11=iav1.activate(); 3808 Future<ctL> iav21=iav2.activate(); 3809 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 3810 &this_type::forward_ctor),result,leaf_op, 3811 iaket1,iap11,iap21,iav11,iav21,eri); 3812 } 3813 forward_ctorVphi_op_NS3814 this_type forward_ctor(implT* result1, const opT& leaf_op, const ctT& iaket1, 3815 const ctL& iap11, const ctL& iap21, const ctL& iav11, const ctL& iav21, 3816 const implT* eri1) { 3817 return this_type(result1,leaf_op,iaket1,iap11,iap21,iav11,iav21,eri1); 3818 } 3819 3820 /// serialize this (needed for use in recursive_op) serializeVphi_op_NS3821 template <typename Archive> void serialize(const Archive& ar) { 3822 ar & iaket & eri & result & leaf_op & iap1 & iap2 & iav1 & iav2; 3823 } 3824 }; 3825 3826 /// assemble the function V*phi using V and phi given from the functor 3827 3828 /// this function must have been constructed using the CompositeFunctorInterface. 3829 /// The interface provides one- and two-electron potentials, and the ket, which are 3830 /// assembled to give V*phi. 3831 /// @param[in] leaf_op operator to decide if a given node is a leaf node 3832 /// @param[in] fence global fence 3833 template<typename opT> 3834 void make_Vphi(const opT& leaf_op, const bool fence=true) { 3835 3836 const size_t LDIM=3; 3837 3838 // keep the functor available, but remove it from the result 3839 // result will return false upon is_on_demand(), which is necessary for the 3840 // CoeffTracker to track the parent coeffs correctly for error_leaf_op 3841 std::shared_ptr< FunctionFunctorInterface<T,NDIM> > func2(this->get_functor()); 3842 this->unset_functor(); 3843 3844 CompositeFunctorInterface<T,NDIM,LDIM>* func= 3845 dynamic_cast<CompositeFunctorInterface<T,NDIM,LDIM>* >(&(*func2)); 3846 MADNESS_ASSERT(func); 3847 3848 coeffs.clear(); 3849 const keyT& key0=cdata.key0; 3850 3851 3852 FunctionImpl<T,NDIM>* ket=func->impl_ket.get(); 3853 const FunctionImpl<T,NDIM>* eri=func->impl_eri.get(); 3854 FunctionImpl<T,LDIM>* v1=func->impl_m1.get(); 3855 FunctionImpl<T,LDIM>* v2=func->impl_m2.get(); 3856 FunctionImpl<T,LDIM>* p1=func->impl_p1.get(); 3857 FunctionImpl<T,LDIM>* p2=func->impl_p2.get(); 3858 3859 if (ket) ket->undo_redundant(false); 3860 if (v1) v1->undo_redundant(false); 3861 if (v2) v2->undo_redundant(false); 3862 if (p1) p1->undo_redundant(false); 3863 if (p2) p2->undo_redundant(false); // fence here 3864 world.gop.fence(); 3865 3866 if (ket) ket->compress(true,true,false,false); 3867 if (v1) v1->compress(true,true,false,false); 3868 if (v2) v2->compress(true,true,false,false); 3869 if (p1) p1->compress(true,true,false,false); 3870 if (p2) p2->compress(true,true,false,false); // fence here 3871 world.gop.fence(); 3872 small=0; 3873 large=0; 3874 3875 if (world.rank() == coeffs.owner(key0)) { 3876 3877 // insert an empty internal node for comparison 3878 this->coeffs.replace(key0,nodeT(coeffT(),true)); 3879 3880 // prepare the CoeffTracker 3881 CoeffTracker<T,NDIM> iaket(ket); 3882 CoeffTracker<T,LDIM> iap1(p1); 3883 CoeffTracker<T,LDIM> iap2(p2); 3884 CoeffTracker<T,LDIM> iav1(v1); 3885 CoeffTracker<T,LDIM> iav2(v2); 3886 3887 // the operator making the coefficients 3888 typedef Vphi_op_NS<opT,LDIM> coeff_opT; 3889 coeff_opT coeff_op(this,leaf_op,iaket,iap1,iap2,iav1,iav2,eri); 3890 3891 // this operator simply inserts the coeffs into this' tree 3892 typedef noop<T,NDIM> apply_opT; 3893 apply_opT apply_op; 3894 3895 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>, 3896 coeff_op, apply_op, cdata.key0); 3897 } 3898 3899 world.gop.fence(); 3900 3901 // remove internal coefficients 3902 this->redundant=true; 3903 this->undo_redundant(false); 3904 3905 // set right state 3906 this->compressed=false; 3907 this->on_demand=false; 3908 this->redundant=false; 3909 this->nonstandard=false; 3910 if (fence) world.gop.fence(); 3911 3912 } 3913 3914 3915 3916 /// Permute the dimensions of f according to map, result on this 3917 void mapdim(const implT& f, const std::vector<long>& map, bool fence); 3918 3919 3920 /// take the average of two functions, similar to: this=0.5*(this+rhs) 3921 3922 /// works in either basis and also in nonstandard form 3923 void average(const implT& rhs); 3924 3925 /// change the tensor type of the coefficients in the FunctionNode 3926 3927 /// @param[in] targs target tensor arguments (threshold and full/low rank) 3928 void change_tensor_type1(const TensorArgs& targs, bool fence); 3929 3930 /// reduce the rank of the coefficients tensors 3931 3932 /// @param[in] targs target tensor arguments (threshold and full/low rank) 3933 void reduce_rank(const TensorArgs& targs, bool fence); 3934 3935 T eval_cube(Level n, coordT& x, const tensorT& c) const; 3936 3937 /// Transform sum coefficients at level n to sums+differences at level n-1 3938 3939 /// Given scaling function coefficients s[n][l][i] and s[n][l+1][i] 3940 /// return the scaling function and wavelet coefficients at the 3941 /// coarser level. I.e., decompose Vn using Vn = Vn-1 + Wn-1. 3942 /// \code 3943 /// s_i = sum(j) h0_ij*s0_j + h1_ij*s1_j 3944 /// d_i = sum(j) g0_ij*s0_j + g1_ij*s1_j 3945 // \endcode 3946 /// Returns a new tensor and has no side effects. Works for any 3947 /// number of dimensions. 3948 /// 3949 /// No communication involved. 3950 tensorT filter(const tensorT& s) const; 3951 3952 coeffT filter(const coeffT& s) const; 3953 3954 /// Transform sums+differences at level n to sum coefficients at level n+1 3955 3956 /// Given scaling function and wavelet coefficients (s and d) 3957 /// returns the scaling function coefficients at the next finer 3958 /// level. I.e., reconstruct Vn using Vn = Vn-1 + Wn-1. 3959 /// \code 3960 /// s0 = sum(j) h0_ji*s_j + g0_ji*d_j 3961 /// s1 = sum(j) h1_ji*s_j + g1_ji*d_j 3962 /// \endcode 3963 /// Returns a new tensor and has no side effects 3964 /// 3965 /// If (sonly) ... then ss is only the scaling function coeff (and 3966 /// assume the d are zero). Works for any number of dimensions. 3967 /// 3968 /// No communication involved. 3969 tensorT unfilter(const tensorT& s) const; 3970 3971 coeffT unfilter(const coeffT& s) const; 3972 3973 /// downsample the sum coefficients of level n+1 to sum coeffs on level n 3974 3975 /// specialization of the filter method, will yield only the sum coefficients 3976 /// @param[in] key key of level n 3977 /// @param[in] v vector of sum coefficients of level n+1 3978 /// @return sum coefficients on level n in full tensor format 3979 tensorT downsample(const keyT& key, const std::vector< Future<coeffT > >& v) const; 3980 3981 /// upsample the sum coefficients of level 1 to sum coeffs on level n+1 3982 3983 /// specialization of the unfilter method, will transform only the sum coefficients 3984 /// @param[in] key key of level n+1 3985 /// @param[in] coeff sum coefficients of level n (does NOT belong to key!!) 3986 /// @return sum coefficients on level n+1 3987 coeffT upsample(const keyT& key, const coeffT& coeff) const; 3988 3989 /// Projects old function into new basis (only in reconstructed form) 3990 void project(const implT& old, bool fence); 3991 3992 struct true_refine_test { operatortrue_refine_test3993 bool operator()(const implT* f, const keyT& key, const nodeT& t) const { 3994 return true; 3995 } serializetrue_refine_test3996 template <typename Archive> void serialize(Archive& ar) {} 3997 }; 3998 3999 template <typename opT> refine_op(const opT & op,const keyT & key)4000 void refine_op(const opT& op, const keyT& key) { 4001 // Must allow for someone already having autorefined the coeffs 4002 // and we get a write accessor just in case they are already executing 4003 typename dcT::accessor acc; 4004 const auto found = coeffs.find(acc,key); 4005 MADNESS_ASSERT(found); 4006 nodeT& node = acc->second; 4007 if (node.has_coeff() && key.level() < max_refine_level && op(this, key, node)) { 4008 coeffT d(cdata.v2k,targs); 4009 d(cdata.s0) += copy(node.coeff()); 4010 d = unfilter(d); 4011 node.clear_coeff(); 4012 node.set_has_children(true); 4013 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 4014 const keyT& child = kit.key(); 4015 coeffT ss = copy(d(child_patch(child))); 4016 ss.reduce_rank(targs.thresh); 4017 // coeffs.replace(child,nodeT(ss,-1.0,false).node_to_low_rank()); 4018 coeffs.replace(child,nodeT(ss,-1.0,false)); 4019 // Note value -1.0 for norm tree to indicate result of refinement 4020 } 4021 } 4022 } 4023 4024 template <typename opT> refine_spawn(const opT & op,const keyT & key)4025 void refine_spawn(const opT& op, const keyT& key) { 4026 nodeT& node = coeffs.find(key).get()->second; 4027 if (node.has_children()) { 4028 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) 4029 woT::task(coeffs.owner(kit.key()), &implT:: template refine_spawn<opT>, op, kit.key(), TaskAttributes::hipri()); 4030 } 4031 else { 4032 woT::task(coeffs.owner(key), &implT:: template refine_op<opT>, op, key); 4033 } 4034 } 4035 4036 // Refine in real space according to local user-defined criterion 4037 template <typename opT> refine(const opT & op,bool fence)4038 void refine(const opT& op, bool fence) { 4039 if (world.rank() == coeffs.owner(cdata.key0)) 4040 woT::task(coeffs.owner(cdata.key0), &implT:: template refine_spawn<opT>, op, cdata.key0, TaskAttributes::hipri()); 4041 if (fence) 4042 world.gop.fence(); 4043 } 4044 4045 bool exists_and_has_children(const keyT& key) const; 4046 4047 bool exists_and_is_leaf(const keyT& key) const; 4048 4049 4050 void broaden_op(const keyT& key, const std::vector< Future <bool> >& v); 4051 4052 // For each local node sets value of norm tree to 0.0 4053 void zero_norm_tree(); 4054 4055 // Broaden tree 4056 void broaden(std::vector<bool> is_periodic, bool fence); 4057 4058 /// sum all the contributions from all scales after applying an operator in mod-NS form 4059 void trickle_down(bool fence); 4060 4061 /// sum all the contributions from all scales after applying an operator in mod-NS form 4062 4063 /// cf reconstruct_op 4064 void trickle_down_op(const keyT& key, const coeffT& s); 4065 4066 void reconstruct(bool fence); 4067 4068 // Invoked on node where key is local 4069 // void reconstruct_op(const keyT& key, const tensorT& s); 4070 void reconstruct_op(const keyT& key, const coeffT& s); 4071 4072 /// compress the wave function 4073 4074 /// after application there will be sum coefficients at the root level, 4075 /// and difference coefficients at all other levels; furthermore: 4076 /// @param[in] nonstandard keep sum coeffs at all other levels, except leaves 4077 /// @param[in] keepleaves keep sum coeffs (but no diff coeffs) at leaves 4078 /// @param[in] redundant keep only sum coeffs at all levels, discard difference coeffs 4079 void compress(bool nonstandard, bool keepleaves, bool redundant, bool fence); 4080 4081 // Invoked on node where key is local 4082 Future<coeffT > compress_spawn(const keyT& key, bool nonstandard, bool keepleaves, bool redundant); 4083 4084 /// convert this to redundant, i.e. have sum coefficients on all levels 4085 void make_redundant(const bool fence); 4086 4087 /// convert this from redundant to standard reconstructed form 4088 void undo_redundant(const bool fence); 4089 4090 4091 /// compute for each FunctionNode the norm of the function inside that node 4092 void norm_tree(bool fence); 4093 4094 double norm_tree_op(const keyT& key, const std::vector< Future<double> >& v); 4095 4096 Future<double> norm_tree_spawn(const keyT& key); 4097 4098 /// truncate using a tree in reconstructed form 4099 4100 /// must be invoked where key is local 4101 Future<coeffT> truncate_reconstructed_spawn(const keyT& key, const double tol); 4102 4103 /// given the sum coefficients of all children, truncate or not 4104 4105 /// @return new sum coefficients (empty if internal, not empty, if new leaf); might delete its children 4106 coeffT truncate_reconstructed_op(const keyT& key, const std::vector< Future<coeffT > >& v, const double tol); 4107 4108 /// calculate the wavelet coefficients using the sum coefficients of all child nodes 4109 4110 /// @param[in] key this's key 4111 /// @param[in] v sum coefficients of the child nodes 4112 /// @param[in] nonstandard keep the sum coefficients with the wavelet coefficients 4113 /// @param[in] redundant keep only the sum coefficients, discard the wavelet coefficients 4114 /// @return the sum coefficients 4115 coeffT compress_op(const keyT& key, const std::vector< Future<coeffT > >& v, bool nonstandard, bool redundant); 4116 4117 4118 /// similar to compress_op, but insert only the sum coefficients in the tree 4119 4120 /// @param[in] key this's key 4121 /// @param[in] v sum coefficients of the child nodes 4122 /// @return the sum coefficients 4123 coeffT make_redundant_op(const keyT& key, const std::vector< Future<coeffT > >& v); 4124 4125 /// Changes non-standard compressed form to standard compressed form 4126 void standard(bool fence); 4127 4128 /// Changes non-standard compressed form to standard compressed form 4129 struct do_standard { 4130 typedef Range<typename dcT::iterator> rangeT; 4131 4132 // threshold for rank reduction / SVD truncation 4133 implT* impl; 4134 4135 // constructor takes target precision do_standarddo_standard4136 do_standard() {} do_standarddo_standard4137 do_standard(implT* impl) : impl(impl) {} 4138 4139 // operatordo_standard4140 bool operator()(typename rangeT::iterator& it) const { 4141 4142 const keyT& key = it->first; 4143 nodeT& node = it->second; 4144 if (key.level()> 0 && node.has_coeff()) { 4145 if (node.has_children()) { 4146 // Zero out scaling coeffs 4147 node.coeff()(impl->cdata.s0)=0.0; 4148 node.reduceRank(impl->targs.thresh); 4149 } else { 4150 // Deleting both scaling and wavelet coeffs 4151 node.clear_coeff(); 4152 } 4153 } 4154 return true; 4155 } serializedo_standard4156 template <typename Archive> void serialize(const Archive& ar) { 4157 MADNESS_EXCEPTION("no serialization of do_standard",1); 4158 } 4159 }; 4160 4161 4162 /// laziness 4163 template<size_t OPDIM> 4164 struct do_op_args { 4165 Key<OPDIM> key,d; 4166 keyT dest; 4167 double tol, fac, cnorm; do_op_argsdo_op_args4168 do_op_args() {} do_op_argsdo_op_args4169 do_op_args(const Key<OPDIM>& key, const Key<OPDIM>& d, const keyT& dest, double tol, double fac, double cnorm) 4170 : key(key), d(d), dest(dest), tol(tol), fac(fac), cnorm(cnorm) {} 4171 template <class Archive> serializedo_op_args4172 void serialize(Archive& ar) { 4173 ar & archive::wrap_opaque(this,1); 4174 } 4175 }; 4176 4177 /// for fine-grain parallelism: call the apply method of an operator in a separate task 4178 4179 /// @param[in] op the operator working on our function 4180 /// @param[in] c full rank tensor holding the NS coefficients 4181 /// @param[in] args laziness holding norm of the coefficients, displacement, destination, .. 4182 template <typename opT, typename R, size_t OPDIM> do_apply_kernel(const opT * op,const Tensor<R> & c,const do_op_args<OPDIM> & args)4183 void do_apply_kernel(const opT* op, const Tensor<R>& c, const do_op_args<OPDIM>& args) { 4184 4185 tensorT result = op->apply(args.key, args.d, c, args.tol/args.fac/args.cnorm); 4186 4187 // Screen here to reduce communication cost of negligible data 4188 // and also to ensure we don't needlessly widen the tree when 4189 // applying the operator 4190 if (result.normf()> 0.3*args.tol/args.fac) { 4191 Future<double> time=coeffs.task(args.dest, &nodeT::accumulate2, result, coeffs, args.dest, TaskAttributes::hipri()); 4192 //woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri()); 4193 // UGLY BUT ADDED THE OPTIMIZATION BACK IN HERE EXPLICITLY/ 4194 if (args.dest == world.rank()) { 4195 coeffs.send(args.dest, &nodeT::accumulate, result, coeffs, args.dest); 4196 } 4197 else { 4198 coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, TaskAttributes::hipri()); 4199 } 4200 } 4201 } 4202 4203 /// same as do_apply_kernel, but use full rank tensors as input and low rank tensors as output 4204 4205 /// @param[in] op the operator working on our function 4206 /// @param[in] c full rank tensor holding the NS coefficients 4207 /// @param[in] args laziness holding norm of the coefficients, displacement, destination, .. 4208 /// @param[in] apply_targs TensorArgs with tightened threshold for accumulation 4209 /// @return nothing, but accumulate the result tensor into the destination node 4210 template <typename opT, typename R, size_t OPDIM> do_apply_kernel2(const opT * op,const Tensor<R> & c,const do_op_args<OPDIM> & args,const TensorArgs & apply_targs)4211 double do_apply_kernel2(const opT* op, const Tensor<R>& c, const do_op_args<OPDIM>& args, 4212 const TensorArgs& apply_targs) { 4213 4214 tensorT result_full = op->apply(args.key, args.d, c, args.tol/args.fac/args.cnorm); 4215 const double norm=result_full.normf(); 4216 4217 // Screen here to reduce communication cost of negligible data 4218 // and also to ensure we don't needlessly widen the tree when 4219 // applying the operator 4220 // OPTIMIZATION NEEDED HERE ... CHANGING THIS TO TASK NOT SEND REMOVED 4221 // BUILTIN OPTIMIZATION TO SHORTCIRCUIT MSG IF DATA IS LOCAL 4222 if (norm > 0.3*args.tol/args.fac) { 4223 4224 small++; 4225 //double cpu0=cpu_time(); 4226 coeffT result=coeffT(result_full,apply_targs); 4227 MADNESS_ASSERT(result.tensor_type()==TT_FULL or result.tensor_type()==TT_2D); 4228 //double cpu1=cpu_time(); 4229 //timer_lr_result.accumulate(cpu1-cpu0); 4230 4231 Future<double> time=coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, apply_targs, 4232 TaskAttributes::hipri()); 4233 4234 //woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri()); 4235 } 4236 return norm; 4237 } 4238 4239 4240 4241 /// same as do_apply_kernel2, but use low rank tensors as input and low rank tensors as output 4242 4243 /// @param[in] op the operator working on our function 4244 /// @param[in] coeff full rank tensor holding the NS coefficients 4245 /// @param[in] args laziness holding norm of the coefficients, displacement, destination, .. 4246 /// @param[in] apply_targs TensorArgs with tightened threshold for accumulation 4247 /// @return nothing, but accumulate the result tensor into the destination node 4248 template <typename opT, typename R, size_t OPDIM> do_apply_kernel3(const opT * op,const GenTensor<R> & coeff,const do_op_args<OPDIM> & args,const TensorArgs & apply_targs)4249 double do_apply_kernel3(const opT* op, const GenTensor<R>& coeff, const do_op_args<OPDIM>& args, 4250 const TensorArgs& apply_targs) { 4251 4252 coeffT result; 4253 if (2*OPDIM==NDIM) result= op->apply2_lowdim(args.key, args.d, coeff, 4254 args.tol/args.fac/args.cnorm, args.tol/args.fac); 4255 if (OPDIM==NDIM) result = op->apply2(args.key, args.d, coeff, 4256 args.tol/args.fac/args.cnorm, args.tol/args.fac); 4257 4258 const double result_norm=result.svd_normf(); 4259 4260 if (result_norm> 0.3*args.tol/args.fac) { 4261 small++; 4262 4263 double cpu0=cpu_time(); 4264 if (targs.tt!=result.tensor_type()) result=result.convert(targs); 4265 double cpu1=cpu_time(); 4266 timer_lr_result.accumulate(cpu1-cpu0); 4267 4268 // accumulate also expects result in SVD form 4269 Future<double> time=coeffs.task(args.dest, &nodeT::accumulate, result, coeffs, args.dest, apply_targs, 4270 TaskAttributes::hipri()); 4271 woT::task(world.rank(),&implT::accumulate_timer,time,TaskAttributes::hipri()); 4272 4273 } 4274 return result_norm; 4275 4276 } 4277 4278 // volume of n-dimensional sphere of radius R vol_nsphere(int n,double R)4279 double vol_nsphere(int n, double R) { 4280 return std::pow(madness::constants::pi,n*0.5)*std::pow(R,n)/std::tgamma(1+0.5*n); 4281 } 4282 4283 4284 /// apply an operator on the coeffs c (at node key) 4285 4286 /// the result is accumulated inplace to this's tree at various FunctionNodes 4287 /// @param[in] op the operator to act on the source function 4288 /// @param[in] key key of the source FunctionNode of f which is processed 4289 /// @param[in] c coeffs of the FunctionNode of f which is processed 4290 template <typename opT, typename R> do_apply(const opT * op,const keyT & key,const Tensor<R> & c)4291 void do_apply(const opT* op, const keyT& key, const Tensor<R>& c) { 4292 PROFILE_MEMBER_FUNC(FunctionImpl); 4293 4294 // working assumption here WAS that the operator is 4295 // isotropic and montonically decreasing with distance 4296 // ... however, now we are using derivative Gaussian 4297 // expansions (and also non-cubic boxes) isotropic is 4298 // violated. While not strictly monotonically decreasing, 4299 // the derivative gaussian is still such that once it 4300 // becomes negligible we are in the asymptotic region. 4301 4302 typedef typename opT::keyT opkeyT; 4303 static const size_t opdim=opT::opdim; 4304 const opkeyT source=op->get_source_key(key); 4305 4306 4307 // Tuning here is based on observation that with 4308 // sufficiently high-order wavelet relative to the 4309 // precision, that only nearest neighbor boxes contribute, 4310 // whereas for low-order wavelets more neighbors will 4311 // contribute. Sufficiently high is picked as 4312 // k>=2-log10(eps) which is our empirical rule for 4313 // efficiency/accuracy and code instrumentation has 4314 // previously indicated that (in 3D) just unit 4315 // displacements are invoked. The error decays as R^-(k+1), 4316 // and the number of boxes increases as R^d. 4317 // 4318 // Fac is the expected number of contributions to a given 4319 // box, so the error permitted per contribution will be 4320 // tol/fac 4321 4322 // radius of shell (nearest neighbor is diameter of 3 boxes, so radius=1.5) 4323 double radius = 1.5 + 0.33*std::max(0.0,2-std::log10(thresh)-k); // 0.33 was 0.5 4324 double fac = vol_nsphere(NDIM, radius); 4325 //previously fac=10.0 selected empirically constrained by qmprop 4326 4327 double cnorm = c.normf(); 4328 4329 const std::vector<opkeyT>& disp = op->get_disp(key.level()); // list of displacements sorted in orer of increasing distance 4330 const std::vector<bool> is_periodic(NDIM,false); // Periodic sum is already done when making rnlp 4331 int ndone=1; // Counts #done at each distance 4332 uint64_t distsq = 99999999999999; 4333 for (typename std::vector<opkeyT>::const_iterator it=disp.begin(); it != disp.end(); ++it) { 4334 keyT d; 4335 Key<NDIM-opdim> nullkey(key.level()); 4336 if (op->particle()==1) d=it->merge_with(nullkey); 4337 if (op->particle()==2) d=nullkey.merge_with(*it); 4338 4339 uint64_t dsq = d.distsq(); 4340 if (dsq != distsq) { // Moved to next shell of neighbors 4341 if (ndone == 0 && dsq > 1) { 4342 // Have at least done the input box and all first 4343 // nearest neighbors, and for all of the last set 4344 // of neighbors had no contribution. Thus, 4345 // assuming monotonic decrease, we are done. 4346 break; 4347 } 4348 ndone = 0; 4349 distsq = dsq; 4350 } 4351 4352 keyT dest = neighbor(key, d, is_periodic); 4353 if (dest.is_valid()) { 4354 double opnorm = op->norm(key.level(), *it, source); 4355 double tol = truncate_tol(thresh, key); 4356 4357 if (cnorm*opnorm> tol/fac) { 4358 ndone++; 4359 tensorT result = op->apply(source, *it, c, tol/fac/cnorm); 4360 if (result.normf() > 0.3*tol/fac) { 4361 if (coeffs.is_local(dest)) 4362 coeffs.send(dest, &nodeT::accumulate2, result, coeffs, dest); 4363 else 4364 coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest); 4365 } 4366 } 4367 } 4368 } 4369 } 4370 4371 4372 /// apply an operator on f to return this 4373 template <typename opT, typename R> apply(opT & op,const FunctionImpl<R,NDIM> & f,bool fence)4374 void apply(opT& op, const FunctionImpl<R,NDIM>& f, bool fence) { 4375 PROFILE_MEMBER_FUNC(FunctionImpl); 4376 MADNESS_ASSERT(!op.modified()); 4377 typename dcT::const_iterator end = f.coeffs.end(); 4378 for (typename dcT::const_iterator it=f.coeffs.begin(); it!=end; ++it) { 4379 // looping through all the coefficients in the source 4380 const keyT& key = it->first; 4381 const FunctionNode<R,NDIM>& node = it->second; 4382 if (node.has_coeff()) { 4383 if (node.coeff().dim(0) != k || op.doleaves) { 4384 ProcessID p = FunctionDefaults<NDIM>::get_apply_randomize() ? world.random_proc() : coeffs.owner(key); 4385 // woT::task(p, &implT:: template do_apply<opT,R>, &op, key, node.coeff()); //.full_tensor_copy() ????? why copy ???? 4386 woT::task(p, &implT:: template do_apply<opT,R>, &op, key, node.coeff().reconstruct_tensor()); 4387 } 4388 } 4389 } 4390 if (fence) 4391 world.gop.fence(); 4392 4393 this->compressed=true; 4394 this->nonstandard=true; 4395 this->redundant=false; 4396 4397 } 4398 4399 4400 4401 /// apply an operator on the coeffs c (at node key) 4402 4403 /// invoked by result; the result is accumulated inplace to this's tree at various FunctionNodes 4404 /// @param[in] op the operator to act on the source function 4405 /// @param[in] key key of the source FunctionNode of f which is processed (see "source") 4406 /// @param[in] coeff coeffs of FunctionNode being processed 4407 /// @param[in] do_kernel true: do the 0-disp only; false: do everything but the kernel 4408 /// @return max norm, and will modify or include new nodes in this' tree 4409 template <typename opT, typename R> do_apply_directed_screening(const opT * op,const keyT & key,const coeffT & coeff,const bool & do_kernel)4410 double do_apply_directed_screening(const opT* op, const keyT& key, const coeffT& coeff, 4411 const bool& do_kernel) { 4412 PROFILE_MEMBER_FUNC(FunctionImpl); 4413 // insert timer here 4414 typedef typename opT::keyT opkeyT; 4415 4416 // screening: contains all displacement keys that had small result norms 4417 std::list<opkeyT> blacklist; 4418 4419 static const size_t opdim=opT::opdim; 4420 Key<NDIM-opdim> nullkey(key.level()); 4421 4422 // source is that part of key that corresponds to those dimensions being processed 4423 const opkeyT source=op->get_source_key(key); 4424 4425 const double tol = truncate_tol(thresh, key); 4426 4427 // fac is the root of the number of contributing neighbors (1st shell) 4428 double fac=std::pow(3,NDIM*0.5); 4429 double cnorm = coeff.normf(); 4430 4431 // for accumulation: keep slightly tighter TensorArgs 4432 TensorArgs apply_targs(targs); 4433 apply_targs.thresh=tol/fac*0.03; 4434 4435 double maxnorm=0.0; 4436 4437 // for the kernel it may be more efficient to do the convolution in full rank 4438 tensorT coeff_full; 4439 // for partial application (exchange operator) it's more efficient to 4440 // do SVD tensors instead of tensortrains, because addition in apply 4441 // can be done in full form for the specific particle 4442 coeffT coeff_SVD=coeff.convert(TensorArgs(-1.0,TT_2D)); 4443 #ifdef USE_LRT 4444 coeff_SVD.impl.svd->orthonormalize(tol*LowRankTensor<T>::fac_reduce()); 4445 #endif 4446 4447 const std::vector<opkeyT>& disp = op->get_disp(key.level()); 4448 const std::vector<bool> is_periodic(NDIM,false); // Periodic sum is already done when making rnlp 4449 4450 for (typename std::vector<opkeyT>::const_iterator it=disp.begin(); it != disp.end(); ++it) { 4451 const opkeyT& d = *it; 4452 4453 const int shell=d.distsq(); 4454 if (do_kernel and (shell>0)) break; 4455 if ((not do_kernel) and (shell==0)) continue; 4456 4457 keyT disp1; 4458 if (op->particle()==1) disp1=it->merge_with(nullkey); 4459 else if (op->particle()==2) disp1=nullkey.merge_with(*it); 4460 else { 4461 MADNESS_EXCEPTION("confused particle in operato??",1); 4462 } 4463 4464 keyT dest = neighbor(key, disp1, is_periodic); 4465 4466 if (not dest.is_valid()) continue; 4467 4468 // directed screening 4469 // working assumption here is that the operator is isotropic and 4470 // monotonically decreasing with distance 4471 bool screened=false; 4472 typename std::list<opkeyT>::const_iterator it2; 4473 for (it2=blacklist.begin(); it2!=blacklist.end(); it2++) { 4474 if (d.is_farther_out_than(*it2)) { 4475 screened=true; 4476 break; 4477 } 4478 } 4479 if (not screened) { 4480 4481 double opnorm = op->norm(key.level(), d, source); 4482 double norm=0.0; 4483 4484 if (cnorm*opnorm> tol/fac) { 4485 4486 double cost_ratio=op->estimate_costs(source, d, coeff_SVD, tol/fac/cnorm, tol/fac); 4487 // cost_ratio=1.5; // force low rank 4488 // cost_ratio=0.5; // force full rank 4489 4490 if (cost_ratio>0.0) { 4491 4492 do_op_args<opdim> args(source, d, dest, tol, fac, cnorm); 4493 norm=0.0; 4494 if (cost_ratio<1.0) { 4495 if (not coeff_full.has_data()) coeff_full=coeff.full_tensor_copy(); 4496 norm=do_apply_kernel2(op, coeff_full,args,apply_targs); 4497 } else { 4498 if (2*opdim==NDIM) { // apply operator on one particle only 4499 norm=do_apply_kernel3(op,coeff_SVD,args,apply_targs); 4500 } else { 4501 norm=do_apply_kernel3(op,coeff,args,apply_targs); 4502 } 4503 } 4504 maxnorm=std::max(norm,maxnorm); 4505 } 4506 4507 } else if (shell >= 12) { 4508 break; // Assumes monotonic decay beyond nearest neighbor 4509 } 4510 if (norm<0.3*tol/fac) blacklist.push_back(d); 4511 } 4512 } 4513 return maxnorm; 4514 } 4515 4516 4517 /// similar to apply, but for low rank coeffs 4518 template <typename opT, typename R> apply_source_driven(opT & op,const FunctionImpl<R,NDIM> & f,bool fence)4519 void apply_source_driven(opT& op, const FunctionImpl<R,NDIM>& f, bool fence) { 4520 PROFILE_MEMBER_FUNC(FunctionImpl); 4521 4522 MADNESS_ASSERT(not op.modified()); 4523 // looping through all the coefficients of the source f 4524 typename dcT::const_iterator end = f.get_coeffs().end(); 4525 for (typename dcT::const_iterator it=f.get_coeffs().begin(); it!=end; ++it) { 4526 4527 const keyT& key = it->first; 4528 const coeffT& coeff = it->second.coeff(); 4529 4530 if (coeff.has_data() and (coeff.rank()!=0)) { 4531 ProcessID p = FunctionDefaults<NDIM>::get_apply_randomize() ? world.random_proc() : coeffs.owner(key); 4532 woT::task(p, &implT:: template do_apply_directed_screening<opT,R>, &op, key, coeff, true); 4533 woT::task(p, &implT:: template do_apply_directed_screening<opT,R>, &op, key, coeff, false); 4534 } 4535 } 4536 if (fence) world.gop.fence(); 4537 } 4538 4539 /// after apply we need to do some cleanup; 4540 double finalize_apply(const bool fence=true); 4541 4542 /// traverse a non-existing tree, make its coeffs and apply an operator 4543 4544 /// invoked by result 4545 /// here we use the fact that the hi-dim NS coefficients on all scales are exactly 4546 /// the outer product of the underlying low-dim functions (also in NS form), 4547 /// so we don't need to construct the full hi-dim tree and then turn it into NS form. 4548 /// @param[in] apply_op the operator acting on the NS tree 4549 /// @param[in] fimpl the funcimpl of the function of particle 1 4550 /// @param[in] gimpl the funcimpl of the function of particle 2 4551 template<typename opT, std::size_t LDIM> recursive_apply(opT & apply_op,const FunctionImpl<T,LDIM> * fimpl,const FunctionImpl<T,LDIM> * gimpl,const bool fence)4552 void recursive_apply(opT& apply_op, const FunctionImpl<T,LDIM>* fimpl, 4553 const FunctionImpl<T,LDIM>* gimpl, const bool fence) { 4554 4555 //print("IN RECUR2"); 4556 const keyT& key0=cdata.key0; 4557 4558 if (world.rank() == coeffs.owner(key0)) { 4559 4560 CoeffTracker<T,LDIM> ff(fimpl); 4561 CoeffTracker<T,LDIM> gg(gimpl); 4562 4563 typedef recursive_apply_op<opT,LDIM> coeff_opT; 4564 coeff_opT coeff_op(this,ff,gg,&apply_op); 4565 4566 typedef noop<T,NDIM> apply_opT; 4567 apply_opT apply_op; 4568 4569 ProcessID p= coeffs.owner(key0); 4570 woT::task(p, &implT:: template forward_traverse<coeff_opT,apply_opT>, coeff_op, apply_op, key0); 4571 4572 } 4573 if (fence) world.gop.fence(); 4574 } 4575 4576 /// recursive part of recursive_apply 4577 template<typename opT, std::size_t LDIM> 4578 struct recursive_apply_op { randomizerecursive_apply_op4579 bool randomize() const {return true;} 4580 4581 typedef recursive_apply_op<opT,LDIM> this_type; 4582 4583 implT* result; 4584 CoeffTracker<T,LDIM> iaf; 4585 CoeffTracker<T,LDIM> iag; 4586 opT* apply_op; 4587 4588 // ctor recursive_apply_oprecursive_apply_op4589 recursive_apply_op() {} recursive_apply_oprecursive_apply_op4590 recursive_apply_op(implT* result, 4591 const CoeffTracker<T,LDIM>& iaf, const CoeffTracker<T,LDIM>& iag, 4592 const opT* apply_op) : result(result), iaf(iaf), iag(iag), apply_op(apply_op) 4593 { 4594 MADNESS_ASSERT(LDIM+LDIM==NDIM); 4595 } recursive_apply_oprecursive_apply_op4596 recursive_apply_op(const recursive_apply_op& other) : result(other.result), iaf(other.iaf), 4597 iag(other.iag), apply_op(other.apply_op) {} 4598 4599 4600 /// make the NS-coefficients and send off the application of the operator 4601 4602 /// @return a Future<bool,coeffT>(is_leaf,coeffT()) operatorrecursive_apply_op4603 std::pair<bool,coeffT> operator()(const Key<NDIM>& key) const { 4604 4605 // World& world=result->world; 4606 // break key into particles (these are the child keys, with datum1/2 come the parent keys) 4607 Key<LDIM> key1,key2; 4608 key.break_apart(key1,key2); 4609 4610 // the lo-dim functions should be in full tensor form 4611 const tensorT fcoeff=iaf.coeff(key1).full_tensor(); 4612 const tensorT gcoeff=iag.coeff(key2).full_tensor(); 4613 4614 // would this be a leaf node? If so, then its sum coeffs have already been 4615 // processed by the parent node's wavelet coeffs. Therefore we won't 4616 // process it any more. 4617 hartree_leaf_op<T,NDIM> leaf_op(result,result->get_k()); 4618 bool is_leaf=leaf_op(key,fcoeff,gcoeff); 4619 4620 if (not is_leaf) { 4621 // new coeffs are simply the hartree/kronecker/outer product -- 4622 const std::vector<Slice>& s0=iaf.get_impl()->cdata.s0; 4623 const coeffT coeff = (apply_op->modified()) 4624 ? outer(copy(fcoeff(s0)),copy(gcoeff(s0)),result->targs) 4625 : outer(fcoeff,gcoeff,result->targs); 4626 4627 // now send off the application 4628 tensorT coeff_full; 4629 ProcessID p=result->world.rank(); 4630 double norm0=result->do_apply_directed_screening<opT,T>(apply_op, key, coeff, true); 4631 4632 result->task(p,&implT:: template do_apply_directed_screening<opT,T>, 4633 apply_op,key,coeff,false); 4634 4635 return finalize(norm0,key,coeff); 4636 4637 } else { 4638 return std::pair<bool,coeffT> (is_leaf,coeffT()); 4639 } 4640 } 4641 4642 /// sole purpose is to wait for the kernel norm, wrap it and send it back to caller finalizerecursive_apply_op4643 std::pair<bool,coeffT> finalize(const double kernel_norm, const keyT& key, 4644 const coeffT& coeff) const { 4645 const double thresh=result->get_thresh()*0.1; 4646 bool is_leaf=(kernel_norm<result->truncate_tol(thresh,key)); 4647 if (key.level()<2) is_leaf=false; 4648 return std::pair<bool,coeffT> (is_leaf,coeff); 4649 } 4650 4651 make_childrecursive_apply_op4652 this_type make_child(const keyT& child) const { 4653 4654 // break key into particles 4655 Key<LDIM> key1, key2; 4656 child.break_apart(key1,key2); 4657 4658 return this_type(result,iaf.make_child(key1),iag.make_child(key2),apply_op); 4659 } 4660 activaterecursive_apply_op4661 Future<this_type> activate() const { 4662 Future<CoeffTracker<T,LDIM> > f1=iaf.activate(); 4663 Future<CoeffTracker<T,LDIM> > g1=iag.activate(); 4664 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 4665 &this_type::forward_ctor),result,f1,g1,apply_op); 4666 } 4667 forward_ctorrecursive_apply_op4668 this_type forward_ctor(implT* r, const CoeffTracker<T,LDIM>& f1, const CoeffTracker<T,LDIM>& g1, 4669 const opT* apply_op1) { 4670 return this_type(r,f1,g1,apply_op1); 4671 } 4672 serializerecursive_apply_op4673 template <typename Archive> void serialize(const Archive& ar) { 4674 ar & result & iaf & iag & apply_op; 4675 } 4676 }; 4677 4678 /// traverse an existing tree and apply an operator 4679 4680 /// invoked by result 4681 /// @param[in] apply_op the operator acting on the NS tree 4682 /// @param[in] fimpl the funcimpl of the source function 4683 /// @param[in] rimpl a dummy function for recursive_op to insert data 4684 template<typename opT> recursive_apply(opT & apply_op,const implT * fimpl,implT * rimpl,const bool fence)4685 void recursive_apply(opT& apply_op, const implT* fimpl, implT* rimpl, const bool fence) { 4686 4687 print("IN RECUR1"); 4688 4689 const keyT& key0=cdata.key0; 4690 4691 if (world.rank() == coeffs.owner(key0)) { 4692 4693 typedef recursive_apply_op2<opT> coeff_opT; 4694 coeff_opT coeff_op(this,fimpl,&apply_op); 4695 4696 typedef noop<T,NDIM> apply_opT; 4697 apply_opT apply_op; 4698 4699 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>, 4700 coeff_op, apply_op, cdata.key0); 4701 4702 } 4703 if (fence) world.gop.fence(); 4704 } 4705 4706 /// recursive part of recursive_apply 4707 template<typename opT> 4708 struct recursive_apply_op2 { randomizerecursive_apply_op24709 bool randomize() const {return true;} 4710 4711 typedef recursive_apply_op2<opT> this_type; 4712 typedef CoeffTracker<T,NDIM> ctT; 4713 typedef std::pair<bool,coeffT> argT; 4714 4715 mutable implT* result; 4716 ctT iaf; /// need this for randomization 4717 const opT* apply_op; 4718 4719 // ctor recursive_apply_op2recursive_apply_op24720 recursive_apply_op2() {} recursive_apply_op2recursive_apply_op24721 recursive_apply_op2(implT* result, const ctT& iaf, const opT* apply_op) 4722 : result(result), iaf(iaf), apply_op(apply_op) {} 4723 recursive_apply_op2recursive_apply_op24724 recursive_apply_op2(const recursive_apply_op2& other) : result(other.result), 4725 iaf(other.iaf), apply_op(other.apply_op) {} 4726 4727 4728 /// send off the application of the operator 4729 4730 /// the first (core) neighbor (ie. the box itself) is processed 4731 /// immediately, all other ones are shoved into the taskq 4732 /// @return a pair<bool,coeffT>(is_leaf,coeffT()) operatorrecursive_apply_op24733 argT operator()(const Key<NDIM>& key) const { 4734 4735 const coeffT& coeff=iaf.coeff(); 4736 4737 if (coeff.has_data()) { 4738 4739 // now send off the application for all neighbor boxes 4740 ProcessID p=result->world.rank(); 4741 result->task(p,&implT:: template do_apply_directed_screening<opT,T>, 4742 apply_op, key, coeff, false); 4743 4744 // process the core box 4745 double norm0=result->do_apply_directed_screening<opT,T>(apply_op,key,coeff,true); 4746 4747 if (iaf.is_leaf()) return argT(true,coeff); 4748 return finalize(norm0,key,coeff,result); 4749 4750 } else { 4751 const bool is_leaf=true; 4752 return argT(is_leaf,coeffT()); 4753 } 4754 } 4755 4756 /// sole purpose is to wait for the kernel norm, wrap it and send it back to caller finalizerecursive_apply_op24757 argT finalize(const double kernel_norm, const keyT& key, 4758 const coeffT& coeff, const implT* r) const { 4759 const double thresh=r->get_thresh()*0.1; 4760 bool is_leaf=(kernel_norm<r->truncate_tol(thresh,key)); 4761 if (key.level()<2) is_leaf=false; 4762 return argT(is_leaf,coeff); 4763 } 4764 4765 make_childrecursive_apply_op24766 this_type make_child(const keyT& child) const { 4767 return this_type(result,iaf.make_child(child),apply_op); 4768 } 4769 4770 /// retrieve the coefficients (parent coeffs might be remote) activaterecursive_apply_op24771 Future<this_type> activate() const { 4772 Future<ctT> f1=iaf.activate(); 4773 4774 // Future<ctL> g1=g.activate(); 4775 // return h->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 4776 // &this_type::forward_ctor),h,f1,g1,particle); 4777 4778 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 4779 &this_type::forward_ctor),result,f1,apply_op); 4780 } 4781 4782 /// taskq-compatible ctor forward_ctorrecursive_apply_op24783 this_type forward_ctor(implT* result1, const ctT& iaf1, const opT* apply_op1) { 4784 return this_type(result1,iaf1,apply_op1); 4785 } 4786 serializerecursive_apply_op24787 template <typename Archive> void serialize(const Archive& ar) { 4788 ar & result & iaf & apply_op; 4789 } 4790 }; 4791 4792 /// Returns the square of the error norm in the box labeled by key 4793 4794 /// Assumed to be invoked locally but it would be easy to eliminate 4795 /// this assumption 4796 template <typename opT> err_box(const keyT & key,const nodeT & node,const opT & func,int npt,const Tensor<double> & qx,const Tensor<double> & quad_phit,const Tensor<double> & quad_phiw)4797 double err_box(const keyT& key, const nodeT& node, const opT& func, 4798 int npt, const Tensor<double>& qx, const Tensor<double>& quad_phit, 4799 const Tensor<double>& quad_phiw) const { 4800 4801 std::vector<long> vq(NDIM); 4802 for (std::size_t i=0; i<NDIM; ++i) 4803 vq[i] = npt; 4804 tensorT fval(vq,false), work(vq,false), result(vq,false); 4805 4806 // Compute the "exact" function in this volume at npt points 4807 // where npt is usually this->npt+1. 4808 fcube(key, func, qx, fval); 4809 4810 // Transform into the scaling function basis of order npt 4811 double scale = pow(0.5,0.5*NDIM*key.level())*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 4812 fval = fast_transform(fval,quad_phiw,result,work).scale(scale); 4813 4814 // Subtract to get the error ... the original coeffs are in the order k 4815 // basis but we just computed the coeffs in the order npt(=k+1) basis 4816 // so we can either use slices or an iterator macro. 4817 const tensorT coeff = node.coeff().full_tensor_copy(); 4818 ITERATOR(coeff,fval(IND)-=coeff(IND);); 4819 // flo note: we do want to keep a full tensor here! 4820 4821 // Compute the norm of what remains 4822 double err = fval.normf(); 4823 return err*err; 4824 } 4825 4826 template <typename opT> 4827 class do_err_box { 4828 const implT* impl; 4829 const opT* func; 4830 int npt; 4831 Tensor<double> qx; 4832 Tensor<double> quad_phit; 4833 Tensor<double> quad_phiw; 4834 public: do_err_box()4835 do_err_box() {} 4836 do_err_box(const implT * impl,const opT * func,int npt,const Tensor<double> & qx,const Tensor<double> & quad_phit,const Tensor<double> & quad_phiw)4837 do_err_box(const implT* impl, const opT* func, int npt, const Tensor<double>& qx, 4838 const Tensor<double>& quad_phit, const Tensor<double>& quad_phiw) 4839 : impl(impl), func(func), npt(npt), qx(qx), quad_phit(quad_phit), quad_phiw(quad_phiw) {} 4840 do_err_box(const do_err_box & e)4841 do_err_box(const do_err_box& e) 4842 : impl(e.impl), func(e.func), npt(e.npt), qx(e.qx), quad_phit(e.quad_phit), quad_phiw(e.quad_phiw) {} 4843 operator()4844 double operator()(typename dcT::const_iterator& it) const { 4845 const keyT& key = it->first; 4846 const nodeT& node = it->second; 4847 if (node.has_coeff()) 4848 return impl->err_box(key, node, *func, npt, qx, quad_phit, quad_phiw); 4849 else 4850 return 0.0; 4851 } 4852 operator()4853 double operator()(double a, double b) const { 4854 return a+b; 4855 } 4856 4857 template <typename Archive> serialize(const Archive & ar)4858 void serialize(const Archive& ar) { 4859 throw "not yet"; 4860 } 4861 }; 4862 4863 /// Returns the sum of squares of errors from local info ... no comms 4864 template <typename opT> errsq_local(const opT & func)4865 double errsq_local(const opT& func) const { 4866 PROFILE_MEMBER_FUNC(FunctionImpl); 4867 // Make quadrature rule of higher order 4868 const int npt = cdata.npt + 1; 4869 Tensor<double> qx, qw, quad_phi, quad_phiw, quad_phit; 4870 FunctionCommonData<T,NDIM>::_init_quadrature(k+1, npt, qx, qw, quad_phi, quad_phiw, quad_phit); 4871 4872 typedef Range<typename dcT::const_iterator> rangeT; 4873 rangeT range(coeffs.begin(), coeffs.end()); 4874 return world.taskq.reduce< double,rangeT,do_err_box<opT> >(range, 4875 do_err_box<opT>(this, &func, npt, qx, quad_phit, quad_phiw)); 4876 } 4877 4878 /// Returns \c int(f(x),x) in local volume 4879 T trace_local() const; 4880 4881 struct do_norm2sq_local { operatordo_norm2sq_local4882 double operator()(typename dcT::const_iterator& it) const { 4883 const nodeT& node = it->second; 4884 if (node.has_coeff()) { 4885 double norm = node.coeff().normf(); 4886 return norm*norm; 4887 } 4888 else { 4889 return 0.0; 4890 } 4891 } 4892 operatordo_norm2sq_local4893 double operator()(double a, double b) const { 4894 return (a+b); 4895 } 4896 serializedo_norm2sq_local4897 template <typename Archive> void serialize(const Archive& ar) { 4898 throw "NOT IMPLEMENTED"; 4899 } 4900 }; 4901 4902 4903 /// Returns the square of the local norm ... no comms 4904 double norm2sq_local() const; 4905 4906 /// compute the inner product of this range with other 4907 template<typename R> 4908 struct do_inner_local { 4909 const FunctionImpl<R,NDIM>* other; 4910 bool leaves_only; 4911 typedef TENSOR_RESULT_TYPE(T,R) resultT; 4912 do_inner_localdo_inner_local4913 do_inner_local(const FunctionImpl<R,NDIM>* other, const bool leaves_only) 4914 : other(other), leaves_only(leaves_only) {} operatordo_inner_local4915 resultT operator()(typename dcT::const_iterator& it) const { 4916 4917 TENSOR_RESULT_TYPE(T,R) sum=0.0; 4918 const keyT& key=it->first; 4919 const nodeT& fnode = it->second; 4920 if (fnode.has_coeff()) { 4921 if (other->coeffs.probe(it->first)) { 4922 const FunctionNode<R,NDIM>& gnode = other->coeffs.find(key).get()->second; 4923 if (gnode.has_coeff()) { 4924 if (gnode.coeff().dim(0) != fnode.coeff().dim(0)) { 4925 madness::print("INNER", it->first, gnode.coeff().dim(0),fnode.coeff().dim(0)); 4926 MADNESS_EXCEPTION("functions have different k or compress/reconstruct error", 0); 4927 } 4928 if (leaves_only) { 4929 if (gnode.is_leaf() or fnode.is_leaf()) { 4930 sum += fnode.coeff().trace_conj(gnode.coeff()); 4931 } 4932 } else { 4933 sum += fnode.coeff().trace_conj(gnode.coeff()); 4934 } 4935 } 4936 } 4937 } 4938 return sum; 4939 } 4940 operatordo_inner_local4941 resultT operator()(resultT a, resultT b) const { 4942 return (a+b); 4943 } 4944 serializedo_inner_local4945 template <typename Archive> void serialize(const Archive& ar) { 4946 throw "NOT IMPLEMENTED"; 4947 } 4948 }; 4949 4950 /// Returns the inner product ASSUMING same distribution 4951 4952 /// handles compressed and redundant form 4953 template <typename R> TENSOR_RESULT_TYPE(T,R)4954 TENSOR_RESULT_TYPE(T,R) inner_local(const FunctionImpl<R,NDIM>& g) const { 4955 PROFILE_MEMBER_FUNC(FunctionImpl); 4956 typedef Range<typename dcT::const_iterator> rangeT; 4957 typedef TENSOR_RESULT_TYPE(T,R) resultT; 4958 4959 // make sure the states of the trees are consistent 4960 MADNESS_ASSERT(this->is_redundant()==g.is_redundant()); 4961 bool leaves_only=(this->is_redundant()); 4962 return world.taskq.reduce<resultT,rangeT,do_inner_local<R> > 4963 (rangeT(coeffs.begin(),coeffs.end()),do_inner_local<R>(&g, leaves_only)); 4964 } 4965 4966 /// Type of the entry in the map returned by make_key_vec_map 4967 typedef std::vector< std::pair<int,const coeffT*> > mapvecT; 4968 4969 /// Type of the map returned by make_key_vec_map 4970 typedef ConcurrentHashMap< keyT, mapvecT > mapT; 4971 4972 /// Adds keys to union of local keys with specified index add_keys_to_map(mapT * map,int index)4973 void add_keys_to_map(mapT* map, int index) const { 4974 typename dcT::const_iterator end = coeffs.end(); 4975 for (typename dcT::const_iterator it=coeffs.begin(); it!=end; ++it) { 4976 typename mapT::accessor acc; 4977 const keyT& key = it->first; 4978 const FunctionNode<T,NDIM>& node = it->second; 4979 if (node.has_coeff()) { 4980 map->insert(acc,key); 4981 acc->second.push_back(std::make_pair(index,&(node.coeff()))); 4982 } 4983 } 4984 } 4985 4986 /// Returns map of union of local keys to vector of indexes of functions containing that key 4987 4988 /// Local concurrency and synchronization only; no communication 4989 static 4990 mapT make_key_vec_map(const std::vector<const FunctionImpl<T,NDIM> * > & v)4991 make_key_vec_map(const std::vector<const FunctionImpl<T,NDIM>*>& v) { 4992 mapT map(100000); 4993 // This loop must be parallelized 4994 for (unsigned int i=0; i<v.size(); i++) { 4995 //v[i]->add_keys_to_map(&map,i); 4996 v[i]->world.taskq.add(*(v[i]), &FunctionImpl<T,NDIM>::add_keys_to_map, &map, int(i)); 4997 } 4998 if (v.size()) v[0]->world.taskq.fence(); 4999 return map; 5000 } 5001 5002 5003 template <typename R> do_inner_localX(const typename mapT::iterator lstart,const typename mapT::iterator lend,typename FunctionImpl<R,NDIM>::mapT * rmap_ptr,const bool sym,Tensor<TENSOR_RESULT_TYPE (T,R)> * result_ptr,Mutex * mutex)5004 static void do_inner_localX(const typename mapT::iterator lstart, 5005 const typename mapT::iterator lend, 5006 typename FunctionImpl<R,NDIM>::mapT* rmap_ptr, 5007 const bool sym, 5008 Tensor< TENSOR_RESULT_TYPE(T,R) >* result_ptr, 5009 Mutex* mutex) { 5010 Tensor< TENSOR_RESULT_TYPE(T,R) >& result = *result_ptr; 5011 Tensor< TENSOR_RESULT_TYPE(T,R) > r(result.dim(0),result.dim(1)); 5012 for (typename mapT::iterator lit=lstart; lit!=lend; ++lit) { 5013 const keyT& key = lit->first; 5014 typename FunctionImpl<R,NDIM>::mapT::iterator rit=rmap_ptr->find(key); 5015 if (rit != rmap_ptr->end()) { 5016 const mapvecT& leftv = lit->second; 5017 const typename FunctionImpl<R,NDIM>::mapvecT& rightv =rit->second; 5018 const int nleft = leftv.size(); 5019 const int nright= rightv.size(); 5020 5021 for (int iv=0; iv<nleft; iv++) { 5022 const int i = leftv[iv].first; 5023 const GenTensor<T>* iptr = leftv[iv].second; 5024 5025 for (int jv=0; jv<nright; jv++) { 5026 const int j = rightv[jv].first; 5027 const GenTensor<R>* jptr = rightv[jv].second; 5028 5029 if (!sym || (sym && i<=j)) 5030 r(i,j) += iptr->trace_conj(*jptr); 5031 } 5032 } 5033 } 5034 } 5035 mutex->lock(); 5036 result += r; 5037 mutex->unlock(); 5038 } 5039 conj(double x)5040 static double conj(double x) { 5041 return x; 5042 } 5043 conj(float x)5044 static double conj(float x) { 5045 return x; 5046 } 5047 conj(const std::complex<double> x)5048 static std::complex<double> conj(const std::complex<double> x) { 5049 return std::conj(x); 5050 } 5051 5052 template <typename R> 5053 static Tensor< TENSOR_RESULT_TYPE(T,R) > inner_local(const std::vector<const FunctionImpl<T,NDIM> * > & left,const std::vector<const FunctionImpl<R,NDIM> * > & right,bool sym)5054 inner_local(const std::vector<const FunctionImpl<T,NDIM>*>& left, 5055 const std::vector<const FunctionImpl<R,NDIM>*>& right, 5056 bool sym) { 5057 5058 // This is basically a sparse matrix^T * matrix product 5059 // Rij = sum(k) Aki * Bkj 5060 // where i and j index functions and k index the wavelet coeffs 5061 // eventually the goal is this structure (don't have jtile yet) 5062 // 5063 // do in parallel tiles of k (tensors of coeffs) 5064 // do tiles of j 5065 // do i 5066 // do j in jtile 5067 // do k in ktile 5068 // Rij += Aki*Bkj 5069 5070 mapT lmap = make_key_vec_map(left); 5071 typename FunctionImpl<R,NDIM>::mapT rmap; 5072 typename FunctionImpl<R,NDIM>::mapT* rmap_ptr = (typename FunctionImpl<R,NDIM>::mapT*)(&lmap); 5073 if ((std::vector<const FunctionImpl<R,NDIM>*>*)(&left) != &right) { 5074 rmap = FunctionImpl<R,NDIM>::make_key_vec_map(right); 5075 rmap_ptr = &rmap; 5076 } 5077 5078 size_t chunk = (lmap.size()-1)/(3*4*5)+1; 5079 5080 Tensor< TENSOR_RESULT_TYPE(T,R) > r(left.size(), right.size()); 5081 Mutex mutex; 5082 5083 typename mapT::iterator lstart=lmap.begin(); 5084 while (lstart != lmap.end()) { 5085 typename mapT::iterator lend = lstart; 5086 advance(lend,chunk); 5087 left[0]->world.taskq.add(&FunctionImpl<T,NDIM>::do_inner_localX<R>, lstart, lend, rmap_ptr, sym, &r, &mutex); 5088 lstart = lend; 5089 } 5090 left[0]->world.taskq.fence(); 5091 5092 if (sym) { 5093 for (long i=0; i<r.dim(0); i++) { 5094 for (long j=0; j<i; j++) { 5095 TENSOR_RESULT_TYPE(T,R) sum = r(i,j)+conj(r(j,i)); 5096 r(i,j) = sum; 5097 r(j,i) = conj(sum); 5098 } 5099 } 5100 } 5101 return r; 5102 } 5103 5104 /// Return the inner product with an external function on a specified function node. 5105 /// @param[in] key Key of the function node to compute the inner product on. (the domain of integration) 5106 /// @param[in] c Tensor of coefficients for the function at the function node given by key 5107 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function 5108 /// @return Returns the inner product over the domain of a single function node, no guarantee of accuracy. inner_ext_node(keyT key,tensorT c,const std::shared_ptr<FunctionFunctorInterface<T,NDIM>> f)5109 T inner_ext_node(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f) const { 5110 tensorT fvals = tensorT(this->cdata.vk); 5111 // Compute the value of the external function at the quadrature points. 5112 fcube(key, *(f), cdata.quad_x, fvals); 5113 // Convert quadrature point values to scaling coefficients. 5114 tensorT fc = tensorT(values2coeffs(key, fvals)); 5115 // Return the inner product of the two functions' scaling coefficients. 5116 return c.trace_conj(fc); 5117 } 5118 5119 /// Call inner_ext_node recursively until convergence. 5120 /// @param[in] key Key of the function node on which to compute inner product (the domain of integration) 5121 /// @param[in] c coeffs for the function at the node given by key 5122 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function 5123 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes 5124 /// @param[in] old_inner the inner product on the parent function node 5125 /// @return Returns the inner product over the domain of a single function, checks for convergence. 5126 T inner_ext_recursive(keyT key, tensorT c, const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine, T old_inner=T(0)) const { 5127 int i = 0; 5128 tensorT c_child, inner_child; 5129 T new_inner, result = 0.0; 5130 5131 c_child = tensorT(cdata.v2k); // tensor of child coeffs 5132 inner_child = Tensor<double>(pow(2, NDIM)); // child inner products 5133 5134 // If old_inner is default value, assume this is the first call 5135 // and compute inner product on this node. 5136 if (old_inner == T(0)) { 5137 old_inner = inner_ext_node(key, c, f); 5138 } 5139 5140 if (coeffs.find(key).get()->second.has_children()) { 5141 // Since the key has children and we know the func is redundant, 5142 // Iterate over all children of this compute node, computing 5143 // the inner product on each child node. new_inner will store 5144 // the sum of these, yielding a more accurate inner product. 5145 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) { 5146 const keyT& child = it.key(); 5147 tensorT cc = coeffs.find(child).get()->second.coeff().full_tensor_copy(); 5148 inner_child(i) = inner_ext_node(child, cc, f); 5149 } 5150 new_inner = inner_child.sum(); 5151 } else if (leaf_refine) { 5152 // We need the scaling coefficients of the numerical function 5153 // at each of the children nodes. We can't use project because 5154 // there is no guarantee that the numerical function will have 5155 // a functor. Instead, since we know we are at or below the 5156 // leaf nodes, the wavelet coefficients are zero (to within the 5157 // truncate tolerance). Thus, we can use unfilter() to 5158 // get the scaling coefficients at the next level. 5159 tensorT d = tensorT(cdata.v2k); 5160 d = T(0); 5161 d(cdata.s0) = copy(c); 5162 c_child = unfilter(d); 5163 5164 // Iterate over all children of this compute node, computing 5165 // the inner product on each child node. new_inner will store 5166 // the sum of these, yielding a more accurate inner product. 5167 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) { 5168 const keyT& child = it.key(); 5169 tensorT cc = tensorT(c_child(child_patch(child))); 5170 inner_child(i) = inner_ext_node(child, cc, f); 5171 } 5172 new_inner = inner_child.sum(); 5173 } else { 5174 // If we get to here, we are at the leaf nodes and the user has 5175 // specified that they do not want refinement past leaf nodes. 5176 new_inner = old_inner; 5177 } 5178 5179 // Check for convergence. If converged...yay, we're done. If not, 5180 // call inner_ext_node_recursive on each child node and accumulate 5181 // the inner product in result. 5182 // if (std::abs(new_inner - old_inner) <= truncate_tol(thresh, key)) { 5183 if (std::abs(new_inner - old_inner) <= thresh) { 5184 result = new_inner; 5185 } else { 5186 i = 0; 5187 for (KeyChildIterator<NDIM> it(key); it; ++it, ++i) { 5188 const keyT& child = it.key(); 5189 tensorT cc = tensorT(c_child(child_patch(child))); 5190 result += inner_ext_recursive(child, cc, f, leaf_refine, inner_child(i)); 5191 } 5192 } 5193 5194 return result; 5195 } 5196 5197 struct do_inner_ext_local_ffi { 5198 const std::shared_ptr< FunctionFunctorInterface<T, NDIM> > fref; 5199 const implT * impl; 5200 const bool leaf_refine; 5201 const bool do_leaves; ///< start with leaf nodes instead of initial_level 5202 do_inner_ext_local_ffido_inner_ext_local_ffi5203 do_inner_ext_local_ffi(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, 5204 const implT * impl, const bool leaf_refine, const bool do_leaves) 5205 : fref(f), impl(impl), leaf_refine(leaf_refine), do_leaves(do_leaves) {}; 5206 operatordo_inner_ext_local_ffi5207 T operator()(typename dcT::const_iterator& it) const { 5208 if (do_leaves and it->second.is_leaf()) { 5209 tensorT cc = it->second.coeff().full_tensor(); 5210 return impl->inner_adaptive_recursive(it->first, cc, fref, leaf_refine, T(0)); 5211 } else if ((not do_leaves) and (it->first.level() == impl->initial_level)) { 5212 tensorT cc = it->second.coeff().full_tensor(); 5213 return impl->inner_ext_recursive(it->first, cc, fref, leaf_refine, T(0)); 5214 } else { 5215 return 0.0; 5216 } 5217 } 5218 operatordo_inner_ext_local_ffi5219 T operator()(T a, T b) const { 5220 return (a + b); 5221 } 5222 serializedo_inner_ext_local_ffi5223 template <typename Archive> void serialize(const Archive& ar) { 5224 throw "NOT IMPLEMENTED"; 5225 } 5226 }; 5227 5228 /// Return the local part of inner product with external function ... no communication. 5229 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function 5230 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes 5231 /// @return Returns local part of the inner product, i.e. over the domain of all function nodes on this compute node. inner_ext_local(const std::shared_ptr<FunctionFunctorInterface<T,NDIM>> f,const bool leaf_refine)5232 T inner_ext_local(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine) const { 5233 typedef Range<typename dcT::const_iterator> rangeT; 5234 5235 return world.taskq.reduce<T, rangeT, do_inner_ext_local_ffi>(rangeT(coeffs.begin(),coeffs.end()), 5236 do_inner_ext_local_ffi(f, this, leaf_refine, false)); 5237 } 5238 5239 /// Return the local part of inner product with external function ... no communication. 5240 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function 5241 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes 5242 /// @return Returns local part of the inner product, i.e. over the domain of all function nodes on this compute node. inner_adaptive_local(const std::shared_ptr<FunctionFunctorInterface<T,NDIM>> f,const bool leaf_refine)5243 T inner_adaptive_local(const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, const bool leaf_refine) const { 5244 typedef Range<typename dcT::const_iterator> rangeT; 5245 5246 return world.taskq.reduce<T, rangeT, do_inner_ext_local_ffi>(rangeT(coeffs.begin(),coeffs.end()), 5247 do_inner_ext_local_ffi(f, this, leaf_refine, true)); 5248 } 5249 5250 /// Call inner_ext_node recursively until convergence. 5251 /// @param[in] key Key of the function node on which to compute inner product (the domain of integration) 5252 /// @param[in] c coeffs for the function at the node given by key 5253 /// @param[in] f Reference to FunctionFunctorInterface. This is the externally provided function 5254 /// @param[in] leaf_refine boolean switch to turn on/off refinement past leaf nodes 5255 /// @param[in] old_inner the inner product on the parent function node 5256 /// @return Returns the inner product over the domain of a single function, checks for convergence. 5257 T inner_adaptive_recursive(keyT key, const tensorT& c, 5258 const std::shared_ptr< FunctionFunctorInterface<T,NDIM> > f, 5259 const bool leaf_refine, T old_inner=T(0)) const { 5260 5261 // the inner product in the current node 5262 old_inner = inner_ext_node(key, c, f); 5263 T result=0.0; 5264 5265 // the inner product in the child nodes 5266 5267 // compute the sum coefficients of the MRA function 5268 tensorT d = tensorT(cdata.v2k); 5269 d = T(0); 5270 d(cdata.s0) = copy(c); 5271 tensorT c_child = unfilter(d); 5272 5273 // compute the inner product in the child nodes 5274 T new_inner=0.0; // child inner products 5275 for (KeyChildIterator<NDIM> it(key); it; ++it) { 5276 const keyT& child = it.key(); 5277 tensorT cc = tensorT(c_child(child_patch(child))); 5278 new_inner+= inner_ext_node(child, cc, f); 5279 } 5280 5281 // continue recursion if needed 5282 const double tol=truncate_tol(thresh,key); 5283 if (leaf_refine and (std::abs(new_inner - old_inner) > tol)) { 5284 for (KeyChildIterator<NDIM> it(key); it; ++it) { 5285 const keyT& child = it.key(); 5286 tensorT cc = tensorT(c_child(child_patch(child))); 5287 result += inner_adaptive_recursive(child, cc, f, leaf_refine, T(0)); 5288 } 5289 } else { 5290 result = new_inner; 5291 } 5292 return result; 5293 5294 } 5295 5296 5297 /// Return the gaxpy product with an external function on a specified 5298 /// function node. 5299 /// @param[in] key Key of the function node on which to compute gaxpy 5300 /// @param[in] lc Tensor of coefficients for the function at the 5301 /// function node given by key 5302 /// @param[in] f Pointer to function of type T that takes coordT 5303 /// arguments. This is the externally provided function and 5304 /// the right argument of gaxpy. 5305 /// @param[in] alpha prefactor of c Tensor for gaxpy 5306 /// @param[in] beta prefactor of fcoeffs for gaxpy 5307 /// @return Returns coefficient tensor of the gaxpy product at specified 5308 /// key, no guarantee of accuracy. 5309 template <typename L> gaxpy_ext_node(keyT key,Tensor<L> lc,T (* f)(const coordT &),T alpha,T beta)5310 tensorT gaxpy_ext_node(keyT key, Tensor<L> lc, T (*f)(const coordT&), T alpha, T beta) const { 5311 // Compute the value of external function at the quadrature points. 5312 tensorT fvals = madness::fcube(key, f, cdata.quad_x); 5313 // Convert quadrature point values to scaling coefficients. 5314 tensorT fcoeffs = values2coeffs(key, fvals); 5315 // Return the inner product of the two functions' scaling coeffs. 5316 tensorT c2 = copy(lc); 5317 c2.gaxpy(alpha, fcoeffs, beta); 5318 return c2; 5319 } 5320 5321 /// Return out of place gaxpy using recursive descent. 5322 /// @param[in] key Key of the function node on which to compute gaxpy 5323 /// @param[in] left FunctionImpl, left argument of gaxpy 5324 /// @param[in] lcin coefficients of left at this node 5325 /// @param[in] c coefficients of gaxpy product at this node 5326 /// @param[in] f pointer to function of type T that takes coordT 5327 /// arguments. This is the externally provided function and 5328 /// the right argument of gaxpy. 5329 /// @param[in] alpha prefactor of left argument for gaxpy 5330 /// @param[in] beta prefactor of right argument for gaxpy 5331 /// @param[in] tol convergence tolerance...when the norm of the gaxpy's 5332 /// difference coefficients is less than tol, we are done. 5333 template <typename L> gaxpy_ext_recursive(const keyT & key,const FunctionImpl<L,NDIM> * left,Tensor<L> lcin,tensorT c,T (* f)(const coordT &),T alpha,T beta,double tol,bool below_leaf)5334 void gaxpy_ext_recursive(const keyT& key, const FunctionImpl<L,NDIM>* left, 5335 Tensor<L> lcin, tensorT c, T (*f)(const coordT&), 5336 T alpha, T beta, double tol, bool below_leaf) { 5337 typedef typename FunctionImpl<L,NDIM>::dcT::const_iterator literT; 5338 5339 // If we haven't yet reached the leaf level, check whether the 5340 // current key is a leaf node of left. If so, set below_leaf to true 5341 // and continue. If not, make this a parent, recur down, return. 5342 if (not below_leaf) { 5343 bool left_leaf = left->coeffs.find(key).get()->second.is_leaf(); 5344 if (left_leaf) { 5345 below_leaf = true; 5346 } else { 5347 this->coeffs.replace(key, nodeT(coeffT(), true)); 5348 for (KeyChildIterator<NDIM> it(key); it; ++it) { 5349 const keyT& child = it.key(); 5350 woT::task(left->coeffs.owner(child), &implT:: template gaxpy_ext_recursive<L>, 5351 child, left, Tensor<L>(), tensorT(), f, alpha, beta, tol, below_leaf); 5352 } 5353 return; 5354 } 5355 } 5356 5357 // Compute left's coefficients if not provided 5358 Tensor<L> lc = lcin; 5359 if (lc.size() == 0) { 5360 literT it = left->coeffs.find(key).get(); 5361 MADNESS_ASSERT(it != left->coeffs.end()); 5362 if (it->second.has_coeff()) 5363 lc = it->second.coeff().full_tensor_copy(); 5364 } 5365 5366 // Compute this node's coefficients if not provided in function call 5367 if (c.size() == 0) { 5368 c = gaxpy_ext_node(key, lc, f, alpha, beta); 5369 } 5370 5371 // We need the scaling coefficients of the numerical function at 5372 // each of the children nodes. We can't use project because there 5373 // is no guarantee that the numerical function will have a functor. 5374 // Instead, since we know we are at or below the leaf nodes, the 5375 // wavelet coefficients are zero (to within the truncate tolerance). 5376 // Thus, we can use unfilter() to get the scaling coefficients at 5377 // the next level. 5378 Tensor<L> lc_child = Tensor<L>(cdata.v2k); // left's child coeffs 5379 Tensor<L> ld = Tensor<L>(cdata.v2k); 5380 ld = L(0); 5381 ld(cdata.s0) = copy(lc); 5382 lc_child = unfilter(ld); 5383 5384 // Iterate over children of this node, 5385 // storing the gaxpy coeffs in c_child 5386 tensorT c_child = tensorT(cdata.v2k); // tensor of child coeffs 5387 for (KeyChildIterator<NDIM> it(key); it; ++it) { 5388 const keyT& child = it.key(); 5389 tensorT lcoeff = tensorT(lc_child(child_patch(child))); 5390 c_child(child_patch(child)) = gaxpy_ext_node(child, lcoeff, f, alpha, beta); 5391 } 5392 5393 // Compute the difference coefficients to test for convergence. 5394 tensorT d = tensorT(cdata.v2k); 5395 d = filter(c_child); 5396 // Filter returns both s and d coefficients, so set scaling 5397 // coefficient part of d to 0 so that we take only the 5398 // norm of the difference coefficients. 5399 d(cdata.s0) = T(0); 5400 double dnorm = d.normf(); 5401 5402 // Small d.normf means we've reached a good level of resolution 5403 // Store the coefficients and return. 5404 if (dnorm <= truncate_tol(tol,key)) { 5405 this->coeffs.replace(key, nodeT(coeffT(c,targs), false)); 5406 } else { 5407 // Otherwise, make this a parent node and recur down 5408 this->coeffs.replace(key, nodeT(coeffT(), true)); // Interior node 5409 5410 for (KeyChildIterator<NDIM> it(key); it; ++it) { 5411 const keyT& child = it.key(); 5412 tensorT child_coeff = tensorT(c_child(child_patch(child))); 5413 tensorT left_coeff = tensorT(lc_child(child_patch(child))); 5414 woT::task(left->coeffs.owner(child), &implT:: template gaxpy_ext_recursive<L>, 5415 child, left, left_coeff, child_coeff, f, alpha, beta, tol, below_leaf); 5416 } 5417 } 5418 } 5419 5420 template <typename L> gaxpy_ext(const FunctionImpl<L,NDIM> * left,T (* f)(const coordT &),T alpha,T beta,double tol,bool fence)5421 void gaxpy_ext(const FunctionImpl<L,NDIM>* left, T (*f)(const coordT&), T alpha, T beta, double tol, bool fence) { 5422 if (world.rank() == coeffs.owner(cdata.key0)) 5423 gaxpy_ext_recursive<L> (cdata.key0, left, Tensor<L>(), tensorT(), f, alpha, beta, tol, false); 5424 if (fence) 5425 world.gop.fence(); 5426 } 5427 5428 /// project the low-dim function g on the hi-dim function f: result(x) = <this(x,y) | g(y)> 5429 5430 /// invoked by the hi-dim function, a function of NDIM+LDIM 5431 5432 /// Upon return, result matches this, with contributions on all scales 5433 /// @param[in] result lo-dim function of NDIM-LDIM \todo Should this be param[out]? 5434 /// @param[in] gimpl lo-dim function of LDIM 5435 /// @param[in] dim over which dimensions to be integrated: 0..LDIM or LDIM..LDIM+NDIM-1 5436 template<size_t LDIM> project_out(FunctionImpl<T,NDIM-LDIM> * result,const FunctionImpl<T,LDIM> * gimpl,const int dim,const bool fence)5437 void project_out(FunctionImpl<T,NDIM-LDIM>* result, const FunctionImpl<T,LDIM>* gimpl, 5438 const int dim, const bool fence) { 5439 5440 const keyT& key0=cdata.key0; 5441 5442 if (world.rank() == coeffs.owner(key0)) { 5443 5444 // coeff_op will accumulate the result 5445 typedef project_out_op<LDIM> coeff_opT; 5446 coeff_opT coeff_op(this,result,CoeffTracker<T,LDIM>(gimpl),dim); 5447 5448 // don't do anything on this -- coeff_op will accumulate into result 5449 typedef noop<T,NDIM> apply_opT; 5450 apply_opT apply_op; 5451 5452 woT::task(world.rank(), &implT:: template forward_traverse<coeff_opT,apply_opT>, 5453 coeff_op, apply_op, cdata.key0); 5454 5455 } 5456 if (fence) world.gop.fence(); 5457 5458 } 5459 5460 5461 /// project the low-dim function g on the hi-dim function f: result(x) = <f(x,y) | g(y)> 5462 template<size_t LDIM> 5463 struct project_out_op { randomizeproject_out_op5464 bool randomize() const {return false;} 5465 5466 typedef project_out_op<LDIM> this_type; 5467 typedef CoeffTracker<T,LDIM> ctL; 5468 typedef FunctionImpl<T,NDIM-LDIM> implL1; 5469 typedef std::pair<bool,coeffT> argT; 5470 5471 const implT* fimpl; ///< the hi dim function f 5472 mutable implL1* result; ///< the low dim result function 5473 ctL iag; ///< the low dim function g 5474 int dim; ///< 0: project 0..LDIM-1, 1: project LDIM..NDIM-1 5475 5476 // ctor project_out_opproject_out_op5477 project_out_op() {} project_out_opproject_out_op5478 project_out_op(const implT* fimpl, implL1* result, const ctL& iag, const int dim) 5479 : fimpl(fimpl), result(result), iag(iag), dim(dim) {} project_out_opproject_out_op5480 project_out_op(const project_out_op& other) 5481 : fimpl(other.fimpl), result(other.result), iag(other.iag), dim(other.dim) {} 5482 5483 5484 /// do the actual contraction operatorproject_out_op5485 Future<argT> operator()(const Key<NDIM>& key) const { 5486 5487 Key<LDIM> key1,key2,dest; 5488 key.break_apart(key1,key2); 5489 5490 // make the right coefficients 5491 coeffT gcoeff; 5492 if (dim==0) { 5493 gcoeff=iag.get_impl()->parent_to_child(iag.coeff(),iag.key(),key1); 5494 dest=key2; 5495 } 5496 if (dim==1) { 5497 gcoeff=iag.get_impl()->parent_to_child(iag.coeff(),iag.key(),key2); 5498 dest=key1; 5499 } 5500 5501 MADNESS_ASSERT(fimpl->get_coeffs().probe(key)); // must be local! 5502 const nodeT& fnode=fimpl->get_coeffs().find(key).get()->second; 5503 const coeffT& fcoeff=fnode.coeff(); 5504 5505 // fast return if possible 5506 if (fcoeff.has_no_data() or gcoeff.has_no_data()) 5507 return Future<argT> (argT(fnode.is_leaf(),coeffT()));; 5508 5509 // let's specialize for the time being on SVD tensors for f and full tensors of half dim for g 5510 MADNESS_ASSERT(gcoeff.tensor_type()==TT_FULL); 5511 MADNESS_ASSERT(fcoeff.tensor_type()==TT_2D); 5512 const tensorT gtensor=gcoeff.full_tensor(); 5513 tensorT final(result->cdata.vk); 5514 5515 const int otherdim=(dim+1)%2; 5516 const int k=fcoeff.dim(0); 5517 std::vector<Slice> s(fcoeff.config().dim_per_vector()+1,_); 5518 5519 // do the actual contraction 5520 for (int r=0; r<fcoeff.rank(); ++r) { 5521 s[0]=Slice(r,r); 5522 const tensorT contracted_tensor=fcoeff.config().ref_vector(dim)(s).reshape(k,k,k); 5523 const tensorT other_tensor=fcoeff.config().ref_vector(otherdim)(s).reshape(k,k,k); 5524 const double ovlp= gtensor.trace_conj(contracted_tensor); 5525 const double fac=ovlp * fcoeff.config().weights(r); 5526 final+=fac*other_tensor; 5527 } 5528 5529 // accumulate the result 5530 result->coeffs.task(dest, &FunctionNode<T,LDIM>::accumulate2, final, result->coeffs, dest, TaskAttributes::hipri()); 5531 5532 return Future<argT> (argT(fnode.is_leaf(),coeffT())); 5533 } 5534 make_childproject_out_op5535 this_type make_child(const keyT& child) const { 5536 Key<LDIM> key1,key2; 5537 child.break_apart(key1,key2); 5538 const Key<LDIM> gkey = (dim==0) ? key1 : key2; 5539 5540 return this_type(fimpl,result,iag.make_child(gkey),dim); 5541 } 5542 5543 /// retrieve the coefficients (parent coeffs might be remote) activateproject_out_op5544 Future<this_type> activate() const { 5545 Future<ctL> g1=iag.activate(); 5546 return result->world.taskq.add(detail::wrap_mem_fn(*const_cast<this_type *> (this), 5547 &this_type::forward_ctor),fimpl,result,g1,dim); 5548 } 5549 5550 /// taskq-compatible ctor forward_ctorproject_out_op5551 this_type forward_ctor(const implT* fimpl1, implL1* result1, const ctL& iag1, const int dim1) { 5552 return this_type(fimpl1,result1,iag1,dim1); 5553 } 5554 serializeproject_out_op5555 template <typename Archive> void serialize(const Archive& ar) { 5556 ar & result & iag & fimpl & dim; 5557 } 5558 5559 }; 5560 5561 5562 /// project the low-dim function g on the hi-dim function f: this(x) = <f(x,y) | g(y)> 5563 5564 /// invoked by result, a function of NDIM 5565 5566 /// @param[in] f hi-dim function of LDIM+NDIM 5567 /// @param[in] g lo-dim function of LDIM 5568 /// @param[in] dim over which dimensions to be integrated: 0..LDIM or LDIM..LDIM+NDIM-1 5569 template<size_t LDIM> project_out2(const FunctionImpl<T,LDIM+NDIM> * f,const FunctionImpl<T,LDIM> * g,const int dim)5570 void project_out2(const FunctionImpl<T,LDIM+NDIM>* f, const FunctionImpl<T,LDIM>* g, const int dim) { 5571 5572 typedef std::pair< keyT,coeffT > pairT; 5573 typedef typename FunctionImpl<T,NDIM+LDIM>::dcT::const_iterator fiterator; 5574 5575 // loop over all nodes of hi-dim f, compute the inner products with all 5576 // appropriate nodes of g, and accumulate in result 5577 fiterator end = f->get_coeffs().end(); 5578 for (fiterator it=f->get_coeffs().begin(); it!=end; ++it) { 5579 const Key<LDIM+NDIM> key=it->first; 5580 const FunctionNode<T,LDIM+NDIM> fnode=it->second; 5581 const coeffT& fcoeff=fnode.coeff(); 5582 5583 if (fnode.is_leaf() and fcoeff.has_data()) { 5584 5585 // break key into particle: over key1 will be summed, over key2 will be 5586 // accumulated, or vice versa, depending on dim 5587 if (dim==0) { 5588 Key<NDIM> key1; 5589 Key<LDIM> key2; 5590 key.break_apart(key1,key2); 5591 5592 Future<pairT> result; 5593 // sock_it_to_me(key1, result.remote_ref(world)); 5594 g->task(coeffs.owner(key1), &implT::sock_it_to_me, key1, result.remote_ref(world), TaskAttributes::hipri()); 5595 woT::task(world.rank(),&implT:: template do_project_out<LDIM>,fcoeff,result,key1,key2,dim); 5596 5597 } else if (dim==1) { 5598 Key<LDIM> key1; 5599 Key<NDIM> key2; 5600 key.break_apart(key1,key2); 5601 5602 Future<pairT> result; 5603 // sock_it_to_me(key2, result.remote_ref(world)); 5604 g->task(coeffs.owner(key2), &implT::sock_it_to_me, key2, result.remote_ref(world), TaskAttributes::hipri()); 5605 woT::task(world.rank(),&implT:: template do_project_out<LDIM>,fcoeff,result,key2,key1,dim); 5606 5607 } else { 5608 MADNESS_EXCEPTION("confused dim in project_out",1); 5609 } 5610 } 5611 } 5612 this->compressed=false; 5613 this->nonstandard=false; 5614 this->redundant=true; 5615 } 5616 5617 5618 /// compute the inner product of two nodes of only some dimensions and accumulate on result 5619 5620 /// invoked by result 5621 /// @param[in] fcoeff coefficients of high dimension LDIM+NDIM 5622 /// @param[in] gpair key and coeffs of low dimension LDIM (possibly a parent node) 5623 /// @param[in] gkey key of actual low dim node (possibly the same as gpair.first, iff gnode exists) 5624 /// @param[in] dest destination node for the result 5625 /// @param[in] dim which dimensions should be contracted: 0..LDIM-1 or LDIM..NDIM+LDIM-1 5626 template<size_t LDIM> do_project_out(const coeffT & fcoeff,const std::pair<keyT,coeffT> gpair,const keyT & gkey,const Key<NDIM> & dest,const int dim)5627 void do_project_out(const coeffT& fcoeff, const std::pair<keyT,coeffT> gpair, const keyT& gkey, 5628 const Key<NDIM>& dest, const int dim) const { 5629 5630 const coeffT gcoeff=parent_to_child(gpair.second,gpair.first,gkey); 5631 5632 // fast return if possible 5633 if (fcoeff.has_no_data() or gcoeff.has_no_data()) return; 5634 5635 // let's specialize for the time being on SVD tensors for f and full tensors of half dim for g 5636 MADNESS_ASSERT(gcoeff.tensor_type()==TT_FULL); 5637 MADNESS_ASSERT(fcoeff.tensor_type()==TT_2D); 5638 const tensorT gtensor=gcoeff.full_tensor(); 5639 tensorT result(cdata.vk); 5640 5641 const int otherdim=(dim+1)%2; 5642 const int k=fcoeff.dim(0); 5643 std::vector<Slice> s(fcoeff.config().dim_per_vector()+1,_); 5644 5645 // do the actual contraction 5646 for (int r=0; r<fcoeff.rank(); ++r) { 5647 s[0]=Slice(r,r); 5648 const tensorT contracted_tensor=fcoeff.config().ref_vector(dim)(s).reshape(k,k,k); 5649 const tensorT other_tensor=fcoeff.config().ref_vector(otherdim)(s).reshape(k,k,k); 5650 const double ovlp= gtensor.trace_conj(contracted_tensor); 5651 const double fac=ovlp * fcoeff.config().weights(r); 5652 result+=fac*other_tensor; 5653 } 5654 5655 // accumulate the result 5656 coeffs.task(dest, &nodeT::accumulate2, result, coeffs, dest, TaskAttributes::hipri()); 5657 } 5658 5659 5660 5661 5662 /// Returns the maximum local depth of the tree ... no communications. 5663 std::size_t max_local_depth() const; 5664 5665 5666 /// Returns the maximum depth of the tree ... collective ... global sum/broadcast 5667 std::size_t max_depth() const; 5668 5669 /// Returns the max number of nodes on a processor 5670 std::size_t max_nodes() const; 5671 5672 /// Returns the min number of nodes on a processor 5673 std::size_t min_nodes() const; 5674 5675 /// Returns the size of the tree structure of the function ... collective global sum 5676 std::size_t tree_size() const; 5677 5678 /// Returns the number of coefficients in the function ... collective global sum 5679 std::size_t size() const; 5680 5681 /// Returns the number of coefficients in the function ... collective global sum 5682 std::size_t real_size() const; 5683 5684 /// print tree size and size 5685 void print_size(const std::string name) const; 5686 5687 /// print the number of configurations per node 5688 void print_stats() const; 5689 5690 /// In-place scale by a constant 5691 void scale_inplace(const T q, bool fence); 5692 5693 /// Out-of-place scale by a constant 5694 template <typename Q, typename F> scale_oop(const Q q,const FunctionImpl<F,NDIM> & f,bool fence)5695 void scale_oop(const Q q, const FunctionImpl<F,NDIM>& f, bool fence) { 5696 typedef typename FunctionImpl<F,NDIM>::nodeT fnodeT; 5697 typedef typename FunctionImpl<F,NDIM>::dcT fdcT; 5698 typename fdcT::const_iterator end = f.coeffs.end(); 5699 for (typename fdcT::const_iterator it=f.coeffs.begin(); it!=end; ++it) { 5700 const keyT& key = it->first; 5701 const fnodeT& node = it->second; 5702 5703 if (node.has_coeff()) { 5704 coeffs.replace(key,nodeT(node.coeff()*q,node.has_children())); 5705 } 5706 else { 5707 coeffs.replace(key,nodeT(coeffT(),node.has_children())); 5708 } 5709 } 5710 if (fence) 5711 world.gop.fence(); 5712 } 5713 }; 5714 5715 namespace archive { 5716 template <class Archive, class T, std::size_t NDIM> 5717 struct ArchiveLoadImpl<Archive,const FunctionImpl<T,NDIM>*> { 5718 static void load(const Archive& ar, const FunctionImpl<T,NDIM>*& ptr) { 5719 bool exists=false; 5720 ar & exists; 5721 if (exists) { 5722 uniqueidT id; 5723 ar & id; 5724 World* world = World::world_from_id(id.get_world_id()); 5725 MADNESS_ASSERT(world); 5726 ptr = static_cast< const FunctionImpl<T,NDIM>*>(world->ptr_from_id< WorldObject< FunctionImpl<T,NDIM> > >(id)); 5727 if (!ptr) 5728 MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use a locally uninitialized object",0); 5729 } else { 5730 ptr=nullptr; 5731 } 5732 } 5733 }; 5734 5735 template <class Archive, class T, std::size_t NDIM> 5736 struct ArchiveStoreImpl<Archive,const FunctionImpl<T,NDIM>*> { 5737 static void store(const Archive& ar, const FunctionImpl<T,NDIM>*const& ptr) { 5738 bool exists=(ptr) ? true : false; 5739 ar & exists; 5740 if (exists) ar & ptr->id(); 5741 } 5742 }; 5743 5744 template <class Archive, class T, std::size_t NDIM> 5745 struct ArchiveLoadImpl<Archive, FunctionImpl<T,NDIM>*> { 5746 static void load(const Archive& ar, FunctionImpl<T,NDIM>*& ptr) { 5747 bool exists=false; 5748 ar & exists; 5749 if (exists) { 5750 uniqueidT id; 5751 ar & id; 5752 World* world = World::world_from_id(id.get_world_id()); 5753 MADNESS_ASSERT(world); 5754 ptr = static_cast< FunctionImpl<T,NDIM>*>(world->ptr_from_id< WorldObject< FunctionImpl<T,NDIM> > >(id)); 5755 if (!ptr) 5756 MADNESS_EXCEPTION("FunctionImpl: remote operation attempting to use a locally uninitialized object",0); 5757 } else { 5758 ptr=nullptr; 5759 } 5760 } 5761 }; 5762 5763 template <class Archive, class T, std::size_t NDIM> 5764 struct ArchiveStoreImpl<Archive, FunctionImpl<T,NDIM>*> { 5765 static void store(const Archive& ar, FunctionImpl<T,NDIM>*const& ptr) { 5766 bool exists=(ptr) ? true : false; 5767 ar & exists; 5768 if (exists) ar & ptr->id(); 5769 // ar & ptr->id(); 5770 } 5771 }; 5772 5773 template <class Archive, class T, std::size_t NDIM> 5774 struct ArchiveLoadImpl<Archive, std::shared_ptr<const FunctionImpl<T,NDIM> > > { 5775 static void load(const Archive& ar, std::shared_ptr<const FunctionImpl<T,NDIM> >& ptr) { 5776 const FunctionImpl<T,NDIM>* f = nullptr; 5777 ArchiveLoadImpl<Archive, const FunctionImpl<T,NDIM>*>::load(ar, f); 5778 ptr.reset(f, [] (const FunctionImpl<T,NDIM> *p_) -> void {}); 5779 } 5780 }; 5781 5782 template <class Archive, class T, std::size_t NDIM> 5783 struct ArchiveStoreImpl<Archive, std::shared_ptr<const FunctionImpl<T,NDIM> > > { 5784 static void store(const Archive& ar, const std::shared_ptr<const FunctionImpl<T,NDIM> >& ptr) { 5785 ArchiveStoreImpl<Archive, const FunctionImpl<T,NDIM>*>::store(ar, ptr.get()); 5786 } 5787 }; 5788 5789 template <class Archive, class T, std::size_t NDIM> 5790 struct ArchiveLoadImpl<Archive, std::shared_ptr<FunctionImpl<T,NDIM> > > { 5791 static void load(const Archive& ar, std::shared_ptr<FunctionImpl<T,NDIM> >& ptr) { 5792 FunctionImpl<T,NDIM>* f = nullptr; 5793 ArchiveLoadImpl<Archive, FunctionImpl<T,NDIM>*>::load(ar, f); 5794 ptr.reset(f, [] (FunctionImpl<T,NDIM> *p_) -> void {}); 5795 } 5796 }; 5797 5798 template <class Archive, class T, std::size_t NDIM> 5799 struct ArchiveStoreImpl<Archive, std::shared_ptr<FunctionImpl<T,NDIM> > > { 5800 static void store(const Archive& ar, const std::shared_ptr<FunctionImpl<T,NDIM> >& ptr) { 5801 ArchiveStoreImpl<Archive, FunctionImpl<T,NDIM>*>::store(ar, ptr.get()); 5802 } 5803 }; 5804 } 5805 5806 } 5807 5808 #endif // MADNESS_MRA_FUNCIMPL_H__INCLUDED 5809