1// /// Returns vector of keys of all local nodes where predicate is true 2// template <typename predicateT> 3// std::vector<keyT> keys (const predicateT& predicate) { 4// std::vector<keyT> v; 5// v.reserve(this->size()); 6// for (typename implT::iterator it = this->begin(); it != this->end(); ++it) { 7// if (predicate(*it)) v.push_back(it->first); 8// } 9// return v; 10// }; 11 12// /// Returns vector of keys of all local nodes 13// std::vector<keyT> keys () { 14// return keys(true_predicate<datumT>()); 15// }; 16 17 18// /// Returns vector of keys of local leaf nodes 19// std::vector<keyT> leaf_nodes () { 20// return keys(is_leaf_predicate()); 21// }; 22 23 24// /// Returns vector of keys of all local nodes with coeff 25// std::vector<keyT> coeff_nodes () { 26// return keys(has_coeff_predicate()); 27// }; 28 29 30 struct true_predicate { 31 bool operator()(const datumT& d) const { 32 return true; 33 }; 34 }; 35 36 37 struct is_leaf_predicate { 38 bool operator()(const datumT& d) const { 39 return d.second.is_leaf(); 40 }; 41 }; 42 43 struct is_not_leaf_predicate { 44 bool operator()(const datumT& d) const { 45 return !d.second.is_leaf(); 46 }; 47 }; 48 49 struct has_coeff_predicate { 50 bool operator()(const datumT& d) const { 51 return d.second.has_coeff(); 52 }; 53 }; 54 55 struct has_no_coeff_predicate { 56 bool operator()(const datumT& d) const { 57 return !d.second.has_coeff(); 58 }; 59 }; 60 61 62 63// void insert_weird_tree(const keyT& key) { 64// // for (Level i = 0; i < key.level(); i++) cout << " "; 65// // print(key); 66// Level Nmax = 1; 67// if (is_local(key)) { 68// bool has_children = ((key.level() < Nmax) || ((key.level() < 2*Nmax)&& 69// (key.translation()[0] == key.translation()[NDIM-1]))); 70// FunctionNode<T,NDIM> node(tensorT(0), has_children); 71// this->insert(key,node); 72// if (has_children) { 73// for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 74// insert_weird_tree(kit.key()); 75// } 76// } 77// } 78// else if (key.level() < Nmax) { 79// for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 80// insert_weird_tree(kit.key()); 81// } 82// } 83// }; 84 85 86 /// Optimized filter (inplace, contiguous, no err checking) 87 88 /// Transforms coefficients in s returning result also in s. 89 /// Uses work2 from common data to eliminate temporary creation and 90 /// to increase cache locality. 91 /// 92 /// No communication involved. 93// inline void filter_inplace(tensorT& s) { 94// transform_inplace(s, cdata.hgT, cdata.work2); 95// }; 96 97 98 /// Optimized unfilter (see info about filter_inplace) 99 100 /// No communication involved. 101// inline void unfilter_inplace(tensorT& s) { 102// transform_inplace(s, cdata.hg, cdata.work2); 103// }; 104//#define WORLD_INSTANTIATE_STATIC_TEMPLATES 105#include <madness/mra/loadbal.h> 106 107using namespace std; 108 109namespace madness { 110 111 typedef int Cost; 112 typedef double CompCost; 113 114 template <typename T, int D> 115 vector<typename DClass<D>::TreeCoords> LoadBalImpl<T,D>::findBestPartition() { 116 vector<typename DClass<D>::TreeCoords> klist; 117 if (this->f.impl->world.mpi.rank() != 0) { 118 print("findBestPartition: leave it to the expert"); 119 this->f.impl->world.gop.fence(); 120 print("about to do broadcast"); 121 unsigned int ksize; 122 this->f.impl->world.gop.template broadcast<unsigned int>(ksize); 123 for (unsigned int i = 0; i < ksize; i++) { 124 typename DClass<D>::TreeCoords t; 125 this->f.impl->world.gop.template broadcast<typename DClass<D>::TreeCoords>(t); 126 klist.push_back(t); 127 } 128 print("done with broadcast"); 129 return klist; 130 } 131 unsigned int npieces = this->f.impl->world.nproc(); 132 bool notdone = true; 133 int count = 0; 134 vector<vector<typename DClass<D>::TreeCoords> > listoflist; 135 vector<typename DClass<D>::TreeCoords> emptylist; 136 vector<Cost> costlist; 137 138 listoflist.push_back(emptylist); 139 costlist.push_back(0); 140 Cost totalCost = 0; 141 142<<<<<<< .mine 143 madness::print("findBestPartition: about to fixCost"); 144======= 145//madness::print("findBestPartition: about to fixCost"); 146>>>>>>> .r223 147 148<<<<<<< .mine 149 typename DClass<D>::KeyD root(0); 150 this->skeltree->template fixCost(root); 151 madness::print("findBestPartition: about to depthFirstPartition"); 152 // this->skeltree->print(root); 153 totalCost = this->skeltree->template depthFirstPartition(root, &listoflist[count], npieces, 154 totalCost, &costlist[count]); 155 //madness::print("findBestPartition: after depthFirstPartition"); 156 int size = listoflist[count].size(); 157 cout << "Partitioned tree " << count << ":" << endl; 158 for (int i = 0; i < size; i++) 159 listoflist[count][i].print(); 160 cout << "Max cost for this tree = " << costlist[count] << endl; 161 cout << endl; 162 if (listoflist[count].size() < npieces) 163 notdone = false; 164 count++; 165======= 166 typename DClass<D>::KeyD root(0); 167 this->skeltree->fixCost(root); 168//madness::print("findBestPartition: about to depthFirstPartition"); 169// this->skeltree->print(root); 170 totalCost = this->skeltree->depthFirstPartition(root, &listoflist[count], npieces, 171 totalCost, &costlist[count]); 172//madness::print("findBestPartition: after depthFirstPartition"); 173 int size = listoflist[count].size(); 174 cout << "Partitioned tree " << count << ":" << endl; 175 for (int i = 0; i < size; i++) 176 listoflist[count][i].print(); 177 cout << "Max cost for this tree = " << costlist[count] << endl; 178 cout << endl; 179 if (listoflist[count].size() < npieces) 180 notdone = false; 181 count++; 182>>>>>>> .r223 183 184<<<<<<< .mine 185 while (notdone) { 186 // this->skeltree.fixCost<D>(root); 187 this->skeltree->template fixCost(root); 188 // this->skeltree.rollup<D>(root); 189 this->skeltree->template rollup(root); 190 listoflist.push_back(emptylist); 191 costlist.push_back(0); 192 this->skeltree->template depthFirstPartition(root, &listoflist[count], npieces, totalCost, &costlist[count]); 193 int size = listoflist[count].size(); 194 cout << "Partitioned tree " << count << ":" << endl; 195 for (int i = 0; i < size; i++) 196 listoflist[count][i].print(); 197 cout << "Max cost for this tree = " << costlist[count] << endl; 198 cout << endl; 199======= 200 while (notdone) { 201 this->skeltree->fixCost(root); 202 this->skeltree->rollup(root); 203 listoflist.push_back(emptylist); 204 costlist.push_back(0); 205 this->skeltree->depthFirstPartition(root, &listoflist[count], npieces, totalCost, &costlist[count]); 206 int size = listoflist[count].size(); 207 cout << "Partitioned tree " << count << ":" << endl; 208 for (int i = 0; i < size; i++) 209 listoflist[count][i].print(); 210 cout << "Max cost for this tree = " << costlist[count] << endl; 211 cout << endl; 212 213 typename DClass<D>::treeT::iterator it = this->skeltree->find(root); 214 if (it == this->skeltree->end()) return klist; 215 typename DClass<D>::NodeD node = it->second; 216 if (!(node.has_children()) || (listoflist[count].size() < npieces)) { 217 notdone = false; 218 } 219 if (listoflist[count].size() < npieces) { 220 listoflist.erase(listoflist.begin()+count); 221 break; 222 } 223 count++; 224 } 225 unsigned int shortestList = 0, SL_index, LB_index; 226 Cost loadBalCost = 0; 227 vector<unsigned int> len; 228 for (int i = 0; i < count; i++) { 229 len.push_back(listoflist[i].size()); 230 if ((len[i] < shortestList) || (shortestList == 0)) { 231 shortestList = len[i]; 232 SL_index = i; 233 } 234 else if ((len[i] == shortestList) && (costlist[i] < costlist[SL_index])) { 235 // all things being equal, prefer better balance 236 shortestList = len[i]; 237 SL_index = i; 238 } 239 if ((costlist[i] < loadBalCost) || (loadBalCost == 0)) { 240 loadBalCost = costlist[i]; 241 LB_index = i; 242 } 243 else if ((costlist[i] == loadBalCost) && (len[i] < listoflist[LB_index].size())) { 244 // all things being equal, prefer fewer cuts 245 loadBalCost = costlist[i]; 246 LB_index = i; 247 } 248 } 249>>>>>>> .r223 250 251 // typename DClass<D>::treeT::iterator it = this->skeltree.find(root); 252 typename DClass<D>::treeT::iterator it = this->skeltree->find(root); 253 // if (it == this->skeltree.end()) return klist; 254 if (it == this->skeltree->end()) return klist; 255 typename DClass<D>::NodeD node = it->second; 256 if (!(node.has_children()) || (listoflist[count].size() < npieces)) { 257 notdone = false; 258 } 259 if (listoflist[count].size() < npieces) { 260 listoflist.erase(listoflist.begin()+count); 261 break; 262 } 263 count++; 264 } 265 unsigned int shortestList = 0, SL_index, LB_index; 266 Cost loadBalCost = 0; 267 vector<unsigned int> len; 268 for (int i = 0; i < count; i++) { 269 len.push_back(listoflist[i].size()); 270 if ((len[i] < shortestList) || (shortestList == 0)) { 271 shortestList = len[i]; 272 SL_index = i; 273 } else if ((len[i] == shortestList) && (costlist[i] < costlist[SL_index])) { 274 // all things being equal, prefer better balance 275 shortestList = len[i]; 276 SL_index = i; 277 } 278 if ((costlist[i] < loadBalCost) || (loadBalCost == 0)) { 279 loadBalCost = costlist[i]; 280 LB_index = i; 281 } else if ((costlist[i] == loadBalCost) && (len[i] < listoflist[LB_index].size())) { 282 // all things being equal, prefer fewer cuts 283 loadBalCost = costlist[i]; 284 LB_index = i; 285 } 286 } 287 288 cout << "The load balance with the fewest broken links has cost " << costlist[SL_index] << 289 ", and " << shortestList-1 << " broken links" << endl; 290 for (unsigned int i = 0; i < shortestList; i++) { 291 listoflist[SL_index][i].print(); 292 } 293 cout << endl; 294 cout << "The load balance with the best balance has cost " << loadBalCost << ", and " << 295 listoflist[LB_index].size()-1 << " broken links" << endl; 296 for (unsigned int i = 0; i < listoflist[LB_index].size(); i++) { 297 listoflist[LB_index][i].print(); 298 } 299 cout << endl; 300 301 CompCost ccleast = 0; 302 int cc_index; 303 for (int i = 0; i < count; i++) { 304 CompCost cctmp = computeCompCost(costlist[i], len[i]-1); 305 if ((i==0) || (cctmp < ccleast)) { 306 ccleast = cctmp; 307 cc_index = i; 308 } 309 } 310 cout << "The load balance with the best overall computational cost has cost " << 311 costlist[cc_index] << " and " << len[cc_index]-1 << " broken links" << endl; 312 for (unsigned int i = 0; i < len[cc_index]; i++) { 313 listoflist[cc_index][i].print(); 314 } 315 for (unsigned int i = 0; i < len[cc_index]; i++) { 316 klist.push_back(listoflist[cc_index][i]); 317 } 318 319 print("findBestPartition: about to do fence"); 320 this->f.impl->world.gop.fence(); 321 print("about to do broadcast"); 322 unsigned int ksize = klist.size(); 323 this->f.impl->world.gop.template broadcast<unsigned int>(ksize); 324 for (unsigned int i=0; i < ksize; i++) { 325 this->f.impl->world.gop.template broadcast<typename DClass<D>::TreeCoords>(klist[i]); 326 } 327 print("done with broadcast"); 328 329 return klist; 330 } 331 332<<<<<<< .mine 333======= 334template <int D, typename Pmap> 335Cost LBTree<D,Pmap>::fixCost(typename DClass<D>::KeyDConst& key) { 336// madness::print("fixCost: key =", key, " is about to be looked for"); 337 typename DClass<D>::treeT::iterator it = this->find(key); 338// madness::print("fixCost: key =", key, " was found (looked for),", (it == this->end())); 339 if (it == this->end()) return 0; 340// madness::print("fixCost: tree it was found (exists)"); 341>>>>>>> .r223 342 343<<<<<<< .mine 344 template <int D> 345 Cost LBTree<D>::fixCost(typename DClass<D>::KeyDConst& key) { 346 madness::print("fixCost: key =", key, " is about to be looked for"); 347 typename DClass<D>::treeT::iterator it = this->find(key); 348 madness::print("fixCost: key =", key, " was found (looked for),", (it == this->end())); 349 if (it == this->end()) return 0; 350 madness::print("fixCost: tree it was found (exists)"); 351 352 typename DClass<D>::NodeD node = it->second; 353 madness::print("fixCost: got node"); 354 NodeData d = node.get_data(); 355 madness::print("fixCost: got data from node"); 356 d.subcost = d.cost; 357 madness::print("fixCost: assigned node cost to subcost"); 358 if (node.has_children()) { 359 madness::print("fixCost: node has children"); 360 for (KeyChildIterator<D> kit(key); kit; ++kit) { 361 d.subcost += this->template fixCost(kit.key()); 362 } 363 } 364 node.set_data(d); 365 madness::print("fixCost: about to insert key =", key, ",", node.get_data()); 366 this->insert(key,node); 367 madness::print("fixCost: inserted node"); 368 return d.subcost; 369======= 370 typename DClass<D>::NodeD node = it->second; 371// madness::print("fixCost: got node"); 372 NodeData d = node.get_data(); 373// madness::print("fixCost: got data from node"); 374 d.subcost = d.cost; 375// madness::print("fixCost: assigned node cost to subcost"); 376 if (node.has_children()) 377 { 378// madness::print("fixCost: node has children"); 379 for (KeyChildIterator<D> kit(key); kit; ++kit) { 380 d.subcost += this->fixCost(kit.key()); 381 } 382>>>>>>> .r223 383 } 384<<<<<<< .mine 385======= 386 node.set_data(d); 387//madness::print("fixCost: about to insert key =", key, ",", node.get_data()); 388 this->insert(key,node); 389//madness::print("fixCost: inserted node"); 390 return d.subcost; 391} 392>>>>>>> .r223 393 394 395 template <int D> 396 Cost LBTree<D>::depthFirstPartition(typename DClass<D>::KeyDConst& key, 397 vector<typename DClass<D>::TreeCoords>* klist, unsigned int npieces, 398 Cost totalcost, Cost *maxcost) { 399//madness::print("depthFirstPartition: at very beginning"); 400<<<<<<< .mine 401 if (totalcost == 0) { 402 totalcost = this->template computeCost(key); 403 } 404 madness::print("depthFirstPartition: totalcost =", totalcost); 405======= 406 if (totalcost == 0) { 407 totalcost = this->computeCost(key); 408 } 409//madness::print("depthFirstPartition: totalcost =", totalcost); 410>>>>>>> .r223 411 412 Cost costLeft = totalcost; 413 int partsLeft = npieces; 414 *maxcost = 0; 415 Cost partitionSize = 0; 416 double facter = 1.1; 417 418<<<<<<< .mine 419 for (int i = npieces-1; i >= 0; i--) { 420 cout << endl << "Beginning partition number " << i << endl; 421 vector<typename DClass<D>::KeyD> tmplist; 422 Cost tpart = computePartitionSize(costLeft, partsLeft); 423 if (tpart > partitionSize) { 424 partitionSize = tpart; 425 } 426 madness::print("depthFirstPartition: partitionSize =", partitionSize); 427 Cost usedUp = 0; 428 bool atleaf = false; 429 usedUp = this->template makePartition(key, &tmplist, partitionSize, (i==0), usedUp, &atleaf); 430 if (*maxcost < usedUp) *maxcost = usedUp; 431 costLeft -= usedUp; 432 partsLeft--; 433 for (unsigned int j = 0; j < tmplist.size(); j++) { 434 klist->push_back(typename DClass<D>::TreeCoords(typename DClass<D>::KeyD(tmplist[j]), i)); 435 } 436 } 437 return totalcost; 438======= 439 for (int i = npieces-1; i >= 0; i--) { 440 cout << endl << "Beginning partition number " << i << endl; 441 vector<typename DClass<D>::KeyD> tmplist; 442 Cost tpart = computePartitionSize(costLeft, partsLeft); 443 if ((tpart > partitionSize) || (tpart*facter < partitionSize)) { 444 partitionSize = tpart; 445 } 446//madness::print("depthFirstPartition: partitionSize =", partitionSize); 447 Cost usedUp = 0; 448 bool atleaf = false; 449 usedUp = this->makePartition(key, &tmplist, partitionSize, (i==0), usedUp, &atleaf); 450 if (*maxcost < usedUp) *maxcost = usedUp; 451 costLeft -= usedUp; 452 partsLeft--; 453 for (unsigned int j = 0; j < tmplist.size(); j++) { 454 klist->push_back(typename DClass<D>::TreeCoords(typename DClass<D>::KeyD(tmplist[j]), i)); 455 } 456>>>>>>> .r223 457 } 458 459 template <int D> 460 void LBTree<D>::rollup(typename DClass<D>::KeyDConst& key) { 461// madness::print("rollup: at beginning"); 462 typename DClass<D>::treeT::iterator it = this->find(key); 463 if (it == this->end()) return; 464 465// madness::print("rollup: about to get node associated with key",key); 466 typename DClass<D>::NodeD node = it->second; 467 if (!node.has_children()) { 468// madness::print("rollup: this node has no children; returning"); 469 return; // no rolling to be done here. 470 } 471// madness::print("rollup: this node has children"); 472 bool hasleafchild = false; 473 for (KeyChildIterator<D> kit(key); kit; ++kit) { 474 typename DClass<D>::treeT::iterator itc = this->find(kit.key()); 475 if (itc != this->end()) { 476// madness::print("rollup: found child", kit.key()); 477 typename DClass<D>::NodeD c = itc->second; 478 if (!c.has_children()) { 479// madness::print("rollup: child is leaf"); 480 hasleafchild = true; 481 break; 482 } else { 483// madness::print("rollup: child", kit.key(), "has children"); 484 } 485 } 486 } 487 if (hasleafchild) { 488// madness::print("rollup: about to meld with key",key); 489<<<<<<< .mine 490 this->template meld(key); 491 } 492 for (KeyChildIterator<D> kit(key); kit; ++kit) { 493 typename DClass<D>::treeT::iterator itc = this->find(kit.key()); 494 if (itc != this->end()) { 495======= 496 this->meld(key); 497 } 498 for (KeyChildIterator<D> kit(key); kit; ++kit) { 499 typename DClass<D>::treeT::iterator itc = this->find(kit.key()); 500 if (itc != this->end()) { 501>>>>>>> .r223 502// madness::print("rollup: found child", kit.key()); 503 typename DClass<D>::NodeD c = itc->second; 504 if (c.has_children()) { 505// madness::print("rollup: child", kit.key(), "has children"); 506<<<<<<< .mine 507 this->template rollup(kit.key()); 508 } 509 } 510 } 511 it = this->find(key); 512 node = it->second; 513 NodeData d = node.get_data(); 514 if (d.istaken) { 515 d.istaken = false; 516 node.set_data(d); 517 this->insert(key,node); 518 } 519======= 520 this->rollup(kit.key()); 521 } 522 } 523>>>>>>> .r223 524 } 525 526 template <int D> 527 void LBTree<D>::meld(typename DClass<D>::KeyDConst& key) { 528// madness::print("meld: at beginning, finding key", key); 529 Cost cheapest = 0; 530 typename DClass<D>::treeT::iterator it = this->find(key); 531 if (it == this->end()) return; 532 533 vector<unsigned int> mylist; 534 535 typename DClass<D>::NodeD node = it->second; 536 unsigned int i = 0; 537// madness::print("meld: about to iterate over children of key", key); 538 for (KeyChildIterator<D> kit(key); kit; ++kit) { 539// madness::print(" meld: iterating over child", i); 540 if (node.has_child(i)) { 541 typename DClass<D>::treeT::iterator itc = this->find(kit.key()); 542 if (itc == this->end()) return; 543 typename DClass<D>::NodeD c = itc->second; 544 if (!c.has_children()) { 545// madness::print(" meld: child",i,"has no children"); 546 Cost cost = c.get_data().cost; 547 if ((cost < cheapest) || (cheapest == 0)) { 548 cheapest = cost; 549 mylist.clear(); 550 mylist.push_back(i); 551 } else if (cost == cheapest) { 552 mylist.push_back(i); 553 } 554 } 555 } 556 i++; 557 } 558 559 if (cheapest == 0) { 560// madness::print("meld: this node has no leaf children"); 561 NodeData d = node.get_data(); 562 d.istaken = false; 563 node.set_data(d); 564 this->insert(key,node); 565 return; 566 } 567 568 NodeData d = node.get_data(); 569 570 i = 0; 571 int j = 0, mlsize = mylist.size(); 572 for (KeyChildIterator<D> kit(key); kit; ++kit) { 573 if (mylist[j] == i) { 574// madness::print("meld: found a match, mylist[",j,"] =",i); 575 this->erase(kit.key()); 576 node.set_child(mylist[j], false); 577 d.cost += cheapest; 578 j++; 579 } 580 i++; 581 if (j == mlsize) break; 582 } 583 d.istaken = false; 584 node.set_data(d); 585 this->insert(key,node); 586// madness::print("meld: inserted node back into tree; goodbye!"); 587 } 588 589 590 template <int D> 591 Cost LBTree<D>::computeCost(typename DClass<D>::KeyDConst& key) { 592 Cost cost = 0; 593 typename DClass<D>::treeT::iterator it = this->find(key); 594 if (it == this->end()) return cost; 595 596<<<<<<< .mine 597 typename DClass<D>::NodeD node = it->second; 598 for (KeyChildIterator<D> kit(key); kit; ++kit) { 599 cost += this->template computeCost(kit.key()); 600 } 601 NodeData d = node.get_data(); 602 cost += d.cost; 603 604 d.subcost = cost; 605 node.set_data(d); 606 this->insert(key,node); 607 return cost; 608======= 609 typename DClass<D>::NodeD node = it->second; 610 for (KeyChildIterator<D> kit(key); kit; ++kit) { 611 cost += this->computeCost(kit.key()); 612>>>>>>> .r223 613 } 614 615 616 template <int D> 617 Cost LBTree<D>::makePartition(typename DClass<D>::KeyDConst& key, 618 vector<typename DClass<D>::KeyD>* klist, Cost partitionSize, bool lastPartition, 619 Cost usedUp, bool *atleaf) { 620// madness::print("at beginning of makePartition: atleaf =", *atleaf); 621 double fudgeFactor = 0.1; 622 Cost maxAddl = (Cost) (fudgeFactor*partitionSize); 623 624 typename DClass<D>::treeT::iterator it = this->find(key); 625 if (it == this->end()) { 626 return usedUp; 627 } 628 629 typename DClass<D>::NodeD node = it->second; 630 NodeData d = node.get_data(); 631 632 it = this->end(); 633 634<<<<<<< .mine 635 madness::print("makePartition: data for key", key, ":", d); 636 madness::print("makePartition: partitionSize =", partitionSize, ", lastPartition =", lastPartition, ", usedUp =", usedUp); 637======= 638// madness::print("makePartition: data for key", key, ":", d); 639// madness::print("makePartition: partitionSize =", partitionSize, ", lastPartition =", lastPartition, ", usedUp =", usedUp); 640>>>>>>> .r223 641 642<<<<<<< .mine 643 if (d.istaken) { 644 madness::print("makePartition: this key is taken"); 645 return usedUp; 646 } 647======= 648 if (d.istaken) { 649// madness::print("makePartition: this key is taken"); 650 return usedUp; 651 } 652>>>>>>> .r223 653 654<<<<<<< .mine 655 madness::print("makePartition: back to key", key); 656======= 657// madness::print("makePartition: back to key", key); 658>>>>>>> .r223 659 660<<<<<<< .mine 661 // if either we're at the last partition, the partition is currently empty 662 // and this is a single item, or there is still room in the partition and 663 // adding this to it won't go above the fudge factor, 664 // then add this piece to the partition. 665 if ((lastPartition) || ((usedUp == 0) && (!node.has_children())) || 666 ((usedUp < partitionSize) && (d.subcost+usedUp <= partitionSize+maxAddl))) { 667 // add to partition 668 madness::print("makePartition: adding to partition", key); 669 klist->push_back(typename DClass<D>::KeyD(key)); 670 d.istaken = true; 671 usedUp += d.subcost; 672 // REMOVE COST FROM FOREPARENTS (implement this) 673 this->template removeCost(key.parent(), d.subcost); 674 node.set_data(d); 675 this->insert(key,node); 676 } else if (usedUp < partitionSize) { 677 // try this node's children (if any) 678 if (node.has_children()) { 679 int i = 0; 680 for (KeyChildIterator<D> kit(key); kit; ++kit) { 681 if (node.has_child(i)) { 682 madness::print("makePartition:", key, "recursively calling", kit.key()); 683 usedUp = this->template makePartition(kit.key(), klist, partitionSize, lastPartition, 684 usedUp, atleaf); 685 if ((*atleaf) || (usedUp >= partitionSize)) { 686 break; 687 } 688 } 689 i++; 690 } 691 } else { 692 madness::print("makePartition: about to set atleaf = true"); 693 *atleaf = true; 694 } 695 } 696 return usedUp; 697======= 698 // if either we're at the last partition, the partition is currently empty 699 // and this is a single item, or there is still room in the partition and 700 // adding this to it won't go above the fudge factor, 701 // then add this piece to the partition. 702 if ((lastPartition) || ((usedUp == 0) && (!node.has_children())) || 703 ((usedUp < partitionSize) && (d.subcost+usedUp <= partitionSize+maxAddl))) { 704 // add to partition 705// madness::print("makePartition: adding to partition", key); 706 klist->push_back(typename DClass<D>::KeyD(key)); 707 d.istaken = true; 708 usedUp += d.subcost; 709 // REMOVE COST FROM FOREPARENTS (implement this) 710 this->removeCost(key.parent(), d.subcost); 711 node.set_data(d); 712 this->insert(key,node); 713>>>>>>> .r223 714 } 715<<<<<<< .mine 716======= 717 else if (usedUp < partitionSize) { 718 // try this node's children (if any) 719 if (node.has_children()) { 720 int i = 0; 721 for (KeyChildIterator<D> kit(key); kit; ++kit) { 722 if (node.has_child(i)) { 723// madness::print("makePartition:", key, "recursively calling", kit.key()); 724 usedUp = this->makePartition(kit.key(), klist, partitionSize, lastPartition, 725 usedUp, atleaf); 726 if ((*atleaf) || (usedUp >= partitionSize)) { 727 break; 728 } 729 } 730 i++; 731 } 732 } 733 else { 734// madness::print("makePartition: about to set atleaf = true"); 735 *atleaf = true; 736 } 737 } 738 return usedUp; 739} 740>>>>>>> .r223 741 742<<<<<<< .mine 743 template <int D> 744 void LBTree<D>::removeCost(typename DClass<D>::KeyDConst& key, Cost c) { 745 madness::print("removeCost: key", key, "owner =", owner(key)); 746 this->get_mypmap().print(); 747 if (((int) key.level()) < 0) return; 748 typename DClass<D>::treeT::iterator it = this->find(key); 749 madness::print("removeCost: found key"); 750 if (it == this->end()) return; 751 typename DClass<D>::NodeD node = it->second; 752 NodeData d = node.get_data(); 753 madness::print("removeCost: got data"); 754 d.subcost -= c; 755 if (key.level() > 0) { 756 this->template removeCost(key.parent(), c); 757 } 758 madness::print("removeCost: before setting, data =", d); 759 node.set_data(d); 760 madness::print("removeCost: after setting, data =", node.get_data()); 761 this->insert(key,node); 762 madness::print("removeCost: after inserting, data = ", node.get_data()); 763======= 764template <int D, typename Pmap> 765void LBTree<D,Pmap>::removeCost(typename DClass<D>::KeyDConst& key, Cost c) { 766//madness::print("removeCost: key", key, "owner =", owner(key)); 767//this->get_procmap().print(); 768 if (((int) key.level()) < 0) return; 769 typename DClass<D>::treeT::iterator it = this->find(key); 770//madness::print("removeCost: found key"); 771 if (it == this->end()) return; 772 typename DClass<D>::NodeD node = it->second; 773 NodeData d = node.get_data(); 774//madness::print("removeCost: got data"); 775 d.subcost -= c; 776 if (key.level() > 0) { 777 this->removeCost(key.parent(), c); 778>>>>>>> .r223 779 } 780<<<<<<< .mine 781======= 782//madness::print("removeCost: before setting, data =", d); 783 node.set_data(d); 784//madness::print("removeCost: after setting, data =", node.get_data()); 785 this->insert(key,node); 786//madness::print("removeCost: after inserting, data = ", node.get_data()); 787} 788>>>>>>> .r223 789 790 791 Cost computePartitionSize(Cost cost, unsigned int parts) { 792 return (Cost) ceil(((double) cost)/((double) parts)); 793 } 794 795 796 CompCost computeCompCost(Cost c, int n) { 797 CompCost compcost; 798 CompCost cfactor = 0.1, nfactor = 1.0; 799 compcost = cfactor*c + nfactor*n; 800 return compcost; 801 } 802 803 804 template <typename T, int D> 805 void migrate_data(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto, 806 typename DClass<D>::KeyD key) { 807 typename FunctionImpl<T,D>::iterator it = tfrom->find(key); 808 if (it == tfrom->end()) return; 809 810 FunctionNode<T,D> node = it->second; 811 812 if (node.has_children()) { 813 for (KeyChildIterator<D> kit(key); kit; ++kit) { 814 migrate_data<T,D>(tfrom, tto, kit.key()); 815 } 816 } 817 tto->insert(key, node); 818 } 819 820 821<<<<<<< .mine 822 template <typename T, int D> 823 void migrate(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto) { 824 typename DClass<D>::KeyD root(0); 825 print("migrate: at beginning"); 826 migrate_data<T,D>(tfrom, tto, root); 827 print("migrate: at end"); 828 } 829======= 830template <typename T, int D, typename Pmap> 831void migrate(SharedPtr<FunctionImpl<T,D,Pmap> > tfrom, SharedPtr<FunctionImpl<T,D,Pmap> > tto) { 832 typename DClass<D>::KeyD root(0); 833//print("migrate: at beginning"); 834 migrate_data<T,D,Pmap>(tfrom, tto, root); 835//print("migrate: at end"); 836} 837>>>>>>> .r223 838 839<<<<<<< .mine 840 // Explicit instantiations for D=1:6 841 template void migrate<double,3,MyPmap<3> >(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom, 842 SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto); 843======= 844// Explicit instantiations for D=1:6 845>>>>>>> .r223 846 847<<<<<<< .mine 848 template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom, 849 SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto, DClass<3>::KeyD key); 850======= 851>>>>>>> .r223 852 853<<<<<<< .mine 854 855// Who knows why these aren't cooperating, so commented out for now 856 /* 857 template void migrate_data<double,1>(SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tfrom, 858 SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tto, DClass<1>::KeyD key); 859 template void migrate_data<double,2>(SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tfrom, 860 SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tto, DClass<2>::KeyD key); 861 template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom, 862 SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto, DClass<3>::KeyD key); 863 template void migrate_data<double,4>(SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tfrom, 864 SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tto, DClass<4>::KeyD key); 865 template void migrate_data<double,5>(SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tfrom, 866 SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tto, DClass<5>::KeyD key); 867 template void migrate_data<double,6>(SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tfrom, 868 SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tto, DClass<6>::KeyD key); 869 870 template void migrate_data<std::complex<double>,1>(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tfrom, 871 SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto, DClass<1>::KeyD key); 872 template void migrate_data<std::complex<double>,2>(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom, 873 SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto, DClass<2>::KeyD key); 874 template void migrate_data<std::complex<double>,3>(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom, 875 SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto, DClass<3>::KeyD key); 876 template void migrate_data<std::complex<double>,4>(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom, 877 SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto, DClass<4>::KeyD key); 878 template void migrate_data<std::complex<double>,5>(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom, 879 SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto, DClass<5>::KeyD key); 880 template void migrate_data<std::complex<double>,6>(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom, 881 SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto, DClass<6>::KeyD key); 882 883 template void migrate<double,1,MyPmap<1> >(SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tfrom, 884 SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tto); 885 template void migrate<double,2,MyPmap<2> >(SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tfrom, 886 SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tto); 887 template void migrate<double,3,MyPmap<3> >(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom, 888 SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto); 889 template void migrate<double,4,MyPmap<4> >(SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tfrom, 890 SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tto); 891 template void migrate<double,5,MyPmap<5> >(SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tfrom, 892 SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tto); 893 template void migrate<double,6,MyPmap<6> >(SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tfrom, 894 SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tto); 895 896 template void migrate<std::complex<double>,1,MyPmap<1> >(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > tfrom, 897 SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto); 898 template void migrate<std::complex<double>,2,MyPmap<2> >(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom, 899 SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto); 900 template void migrate<std::complex<double>,3,MyPmap<3> >(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom, 901 SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto); 902 template void migrate<std::complex<double>,4,MyPmap<4> >(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom, 903 SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto); 904 template void migrate<std::complex<double>,5,MyPmap<5> >(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom, 905 SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto); 906 template void migrate<std::complex<double>,6,MyPmap<6> >(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom, 907 SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto); 908 */ 909======= 910template void migrate_data<double,1>(SharedPtr<FunctionImpl<double,1,MyProcmap<1> > > tfrom, 911 SharedPtr<FunctionImpl<double,1,MyProcmap<1> > > tto, DClass<1>::KeyD key); 912template void migrate_data<double,2>(SharedPtr<FunctionImpl<double,2,MyProcmap<2> > > tfrom, 913 SharedPtr<FunctionImpl<double,2,MyProcmap<2> > > tto, DClass<2>::KeyD key); 914template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3,MyProcmap<3> > > tfrom, 915 SharedPtr<FunctionImpl<double,3,MyProcmap<3> > > tto, DClass<3>::KeyD key); 916template void migrate_data<double,4>(SharedPtr<FunctionImpl<double,4,MyProcmap<4> > > tfrom, 917 SharedPtr<FunctionImpl<double,4,MyProcmap<4> > > tto, DClass<4>::KeyD key); 918template void migrate_data<double,5>(SharedPtr<FunctionImpl<double,5,MyProcmap<5> > > tfrom, 919 SharedPtr<FunctionImpl<double,5,MyProcmap<5> > > tto, DClass<5>::KeyD key); 920template void migrate_data<double,6>(SharedPtr<FunctionImpl<double,6,MyProcmap<6> > > tfrom, 921 SharedPtr<FunctionImpl<double,6,MyProcmap<6> > > tto, DClass<6>::KeyD key); 922>>>>>>> .r223 923 924 template class LoadBalImpl<double,1,MyPmap<1> >; 925 template class LoadBalImpl<double,2,MyPmap<2> >; 926 template class LoadBalImpl<double,3,MyPmap<3> >; 927 template class LoadBalImpl<double,4,MyPmap<4> >; 928 template class LoadBalImpl<double,5,MyPmap<5> >; 929 template class LoadBalImpl<double,6,MyPmap<6> >; 930 931 template class LoadBalImpl<std::complex<double>,1,MyPmap<1> >; 932 template class LoadBalImpl<std::complex<double>,2,MyPmap<2> >; 933 template class LoadBalImpl<std::complex<double>,3,MyPmap<3> >; 934 template class LoadBalImpl<std::complex<double>,4,MyPmap<4> >; 935 template class LoadBalImpl<std::complex<double>,5,MyPmap<5> >; 936 template class LoadBalImpl<std::complex<double>,6,MyPmap<6> >; 937 938<<<<<<< .mine 939 template class LBTree<1,MyPmap<1> >; 940 template class LBTree<2,MyPmap<2> >; 941 template class LBTree<3,MyPmap<3> >; 942 template class LBTree<4,MyPmap<4> >; 943 template class LBTree<5,MyPmap<5> >; 944 template class LBTree<6,MyPmap<6> >; 945======= 946// Who knows why this isn't cooperating, so commented out for now 947/* 948template void migrate<std::complex<double>,1,MyProcmap<1> >(SharedPtr<FunctionImpl<std::complex<double>,1,MyProcmap<1> > tfrom, 949 SharedPtr<FunctionImpl<std::complex<double>,1,MyProcmap<1> > > tto); 950*/ 951template void migrate<std::complex<double>,2,MyProcmap<2> >(SharedPtr<FunctionImpl<std::complex<double>,2,MyProcmap<2> > > tfrom, 952 SharedPtr<FunctionImpl<std::complex<double>,2,MyProcmap<2> > > tto); 953template void migrate<std::complex<double>,3,MyProcmap<3> >(SharedPtr<FunctionImpl<std::complex<double>,3,MyProcmap<3> > > tfrom, 954 SharedPtr<FunctionImpl<std::complex<double>,3,MyProcmap<3> > > tto); 955template void migrate<std::complex<double>,4,MyProcmap<4> >(SharedPtr<FunctionImpl<std::complex<double>,4,MyProcmap<4> > > tfrom, 956 SharedPtr<FunctionImpl<std::complex<double>,4,MyProcmap<4> > > tto); 957template void migrate<std::complex<double>,5,MyProcmap<5> >(SharedPtr<FunctionImpl<std::complex<double>,5,MyProcmap<5> > > tfrom, 958 SharedPtr<FunctionImpl<std::complex<double>,5,MyProcmap<5> > > tto); 959template void migrate<std::complex<double>,6,MyProcmap<6> >(SharedPtr<FunctionImpl<std::complex<double>,6,MyProcmap<6> > > tfrom, 960 SharedPtr<FunctionImpl<std::complex<double>,6,MyProcmap<6> > > tto); 961 962 963template class LoadBalImpl<double,1,MyProcmap<1> >; 964template class LoadBalImpl<double,2,MyProcmap<2> >; 965template class LoadBalImpl<double,3,MyProcmap<3> >; 966template class LoadBalImpl<double,4,MyProcmap<4> >; 967template class LoadBalImpl<double,5,MyProcmap<5> >; 968template class LoadBalImpl<double,6,MyProcmap<6> >; 969 970template class LoadBalImpl<std::complex<double>,1,MyProcmap<1> >; 971template class LoadBalImpl<std::complex<double>,2,MyProcmap<2> >; 972template class LoadBalImpl<std::complex<double>,3,MyProcmap<3> >; 973template class LoadBalImpl<std::complex<double>,4,MyProcmap<4> >; 974template class LoadBalImpl<std::complex<double>,5,MyProcmap<5> >; 975template class LoadBalImpl<std::complex<double>,6,MyProcmap<6> >; 976 977template class LBTree<1,MyProcmap<1> >; 978template class LBTree<2,MyProcmap<2> >; 979template class LBTree<3,MyProcmap<3> >; 980template class LBTree<4,MyProcmap<4> >; 981template class LBTree<5,MyProcmap<5> >; 982template class LBTree<6,MyProcmap<6> >; 983>>>>>>> .r223 984} 985//#define WORLD_INSTANTIATE_STATIC_TEMPLATES 986#ifndef LOADBAL_H 987#define LOADBAL_H 988 989#include <madness/world/MADworld.h> 990#include <madness/mra/key.h> 991#include <madness/mra/mra.h> 992using namespace std; 993 994namespace madness { 995 996 typedef int Cost; 997 typedef double CompCost; 998 999 inline int nearest_power(int me, int d) { 1000 int k = 0; 1001 while (me != 0) { 1002 if (me%d == 0) { 1003 k++; 1004 me/=d; 1005 } else { 1006 break; 1007 } 1008 } 1009 return k; 1010 }; 1011 1012 template <typename Data, int D> class LBNode; 1013 template <int D> struct TreeCoords; 1014 template <int D> struct Tree; 1015 template <int D> class MyPmap; 1016 template <int D> class LBTree; 1017 class NodeData; 1018 1019 template <int D> 1020 struct DClass { 1021 typedef Key<D> KeyD; 1022 typedef const Key<D> KeyDConst; 1023 typedef TreeCoords<D> TreeCoords; 1024 typedef Tree<D> Tree; 1025 typedef LBNode<NodeData,D> NodeD; 1026 typedef const LBNode<NodeData,D> NodeDConst; 1027 typedef MyPmap<D> MyPmap; 1028 typedef LBTree<D> treeT; 1029 }; 1030 1031 template <typename T, int D> 1032 void migrate(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto); 1033 1034<<<<<<< .mine 1035 template <typename T, int D> 1036 void migrate_data(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto, 1037 typename DClass<D>::KeyD key); 1038======= 1039template <int D> 1040struct DClass { 1041 typedef Key<D> KeyD; 1042 typedef const Key<D> KeyDConst; 1043 typedef TreeCoords<D> TreeCoords; 1044 typedef Tree<D> Tree; 1045 typedef LBNode<NodeData,D> NodeD; 1046 typedef const LBNode<NodeData,D> NodeDConst; 1047 typedef MyProcmap<D> MyProcMap; 1048 typedef LBTree<D,MyProcMap> treeT; 1049}; 1050>>>>>>> .r223 1051 1052 template <typename Data, int D> 1053 class LBNode { 1054 private: 1055 Data data; 1056 std::vector<bool> c; 1057 1058 void allchildren(bool status=false) { 1059 c.clear(); 1060 c.assign(dim, status); 1061 }; 1062 1063 public: 1064 static int dim; 1065 1066 LBNode() { 1067 data = Data(); 1068 allchildren(); 1069 }; 1070 1071 LBNode(Data d, bool children=false) : data(d) { 1072 allchildren(children); 1073 }; 1074 1075 bool has_children() const { 1076 for (int i = 0; i < dim; i++) 1077 if (c[i]) return true; 1078 return false; 1079 }; 1080 1081 bool has_child(unsigned int i) const { 1082 return c[i]; 1083 }; 1084 1085 bool has_child(int i) const { 1086 return c[i]; 1087 }; 1088 1089 void set_child(int i, bool setto = true) { 1090 c[i] = setto; 1091 }; 1092 1093 void set_data(Data d) { 1094 data = d; 1095 }; 1096 1097 Data get_data() const { 1098 return data; 1099 }; 1100 1101 vector<bool> get_c() const { 1102 return c; 1103 }; 1104 1105 template <typename Archive> 1106 void serialize(const Archive& ar) { 1107 ar & data & c; 1108 } 1109 }; 1110 1111 1112 template <typename Data, int D> 1113 std::ostream& operator<<(std::ostream& s, const LBNode<Data, D>& node) { 1114 s << "data = " << node.get_data() << ", c = " << node.get_c(); 1115 return s; 1116 }; 1117 1118 template <int D> 1119 std::ostream& operator<<(std::ostream& s, typename DClass<D>::NodeDConst& node) { 1120 s << "data = " << node.get_data() << ", c = " << node.get_c(); 1121 return s; 1122 }; 1123 1124 1125 template <typename Data, int D> 1126 int LBNode<Data,D>::dim = power<D>(); 1127 1128 1129 class NodeData { 1130 friend std::ostream& operator<<(std::ostream& s, const NodeData& nd); 1131 public: 1132 int cost; 1133 int subcost; 1134 bool istaken; 1135 NodeData(int c = 1, int s = 1, bool i = false) : cost(c), subcost(s), istaken(i) {}; 1136 template <typename Archive> 1137 void serialize(const Archive& ar) { 1138 ar & cost & subcost & istaken; 1139 }; 1140 void print() { 1141 cout << "cost = " << cost << ", subcost = " << subcost << ", istaken = " << istaken << endl; 1142 }; 1143 }; 1144 1145 1146 inline std::ostream& operator<<(std::ostream& s, const NodeData& nd) { 1147 s << "cost " << nd.cost << ", subcost " << nd.subcost << ", istaken " << nd.istaken; 1148 return s; 1149 }; 1150 1151 1152 1153 template <int D> 1154 struct TreeCoords { 1155 Key<D> key; 1156 ProcessID owner; 1157 1158 TreeCoords(const Key<D> k, ProcessID o) : key(Key<D>(k)), owner(o) {}; 1159 TreeCoords(const TreeCoords& t) : key(Key<D>(t.key)), owner(t.owner) {}; 1160 TreeCoords() : key(Key<D>()), owner(-1) {}; 1161 void print() const { 1162 madness::print(key, " owner =", owner); 1163 }; 1164 1165 bool operator< (const TreeCoords t) const { 1166 return (this->key < t.key); 1167 }; 1168 }; 1169 1170 1171 1172 template <int D> 1173 struct Tree { 1174 TreeCoords<D> data; 1175 vector<SharedPtr<Tree> > children; 1176 Tree* parent; 1177 1178 Tree() {}; 1179 Tree(TreeCoords<D> d) : data(d), parent(0) {}; 1180 Tree(TreeCoords<D> d, Tree* p) : data(d), parent(p) {}; 1181 1182 Tree(const Tree<D>& tree) : data(tree.data), parent(0) {}; 1183 Tree(const Tree<D>& tree, Tree* p) : data(tree.data), parent(p) {}; 1184 1185 Tree<D>& operator=(const Tree<D>& other) { 1186 if (this != &other) { 1187 this->data = other.data; 1188 this->parent = other.parent; 1189 this->children = other.children; 1190 } 1191 return *this; 1192 }; 1193 1194 void insertChild(TreeCoords<D> d) { 1195 Tree* c = new Tree(d, this); 1196 children.insert(children.begin(),SharedPtr<Tree<D> > (c)); 1197 }; 1198 1199 void insertChild(const Tree<D>& tree) { 1200 Tree* c = new Tree(tree, this); 1201 children.insert(children.begin(),SharedPtr<Tree<D> > (c)); 1202 }; 1203 1204 void print() { 1205 data.print(); 1206 int csize = children.size(); 1207 for (int j = 0; j < csize; j++) { 1208 children[j]->print(); 1209 } 1210 }; 1211 1212 bool isForeparentOf(Key<D> key) const { 1213 return (this->data.key.is_parent_of(key)); 1214 }; 1215 1216 void findOwner(const Key<D> key, ProcessID *ow) const { 1217//madness::print("findOwner: at node", this->data.key); 1218 if (this->isForeparentOf(key)) { 1219//madness::print("findOwner: node", this->data.key, "is foreparent of", key, "so owner =", this->data.owner); 1220 *ow = this->data.owner; 1221 if (this->data.key.level() < key.level()) { 1222 int csize = children.size(); 1223 for (int j = 0; j < csize; j++) { 1224//madness::print("findOwner: recursively call on ", this->children[j]->data.key); 1225 children[j]->findOwner(key, ow); 1226 } 1227 } 1228 } 1229 }; 1230 1231 bool fill(TreeCoords<D> node) { 1232 bool success = false; 1233 if (this->isForeparentOf(node.key)) { 1234 int csize = children.size(); 1235 for (int i = 0; i < csize; i++) { 1236 if (children[i]->isForeparentOf(node.key)) { 1237 success = children[i]->fill(node); 1238 } 1239 } 1240 if (!success) { 1241 this->insertChild(node); 1242 success = true; 1243 } 1244 } 1245 return success; 1246 } 1247 }; 1248 1249 1250 1251 template <int D> 1252 class MyPmap : public WorldDCPmapInterface< Key<D> > { 1253 private: 1254 bool static_map; 1255 const ProcessID staticmap_owner; 1256 Tree<D>* treeMap; 1257 typedef Key<D> KeyD; 1258 1259 void buildTreeMap(vector<TreeCoords<D> > v) { 1260 sort(v.begin(), v.end()); 1261 int vlen = v.size(); 1262 1263 if (vlen == 0) throw "empty map!!!"; 1264 1265<<<<<<< .mine 1266 treeMap = new Tree<D>(v[vlen-1]); 1267 for (int j = vlen-2; j >= 0; j--) { 1268 treeMap->fill(v[j]); 1269 } 1270 }; 1271======= 1272 ProcessID getOwner(const KeyD& key) const { 1273 ProcessID owner; 1274 treeMap->findOwner(key, &owner); 1275 return owner; 1276 }; 1277>>>>>>> .r223 1278 1279 1280 public: 1281 MyPmap() : staticmap(false), staticmap_owner(0) {}; 1282 1283 MyPmap(World& world) : staticmap(false), staticmap_owner(0) { 1284 int NP = world.nproc(); 1285 int twotoD = power<D>(); 1286 const int level = nearest_power(NP, twotoD); 1287 int NPin = (int) pow((double)twotoD,level); 1288 vector<TreeCoords<D> > v; 1289 1290<<<<<<< .mine 1291 for (Translation i=0; i < (Translation)NPin; i++) { 1292 KeyD key(level,i); 1293 if ((i%twotoD) == 0) { 1294 key = key.parent(nearest_power(NPin-i, twotoD)); 1295 } 1296 v.push_back(TreeCoords<D>(key,i)); 1297 } 1298 buildTreeMap(v); 1299 madness::print("MyPmap constructor"); 1300 treeMap->print(); 1301 }; 1302======= 1303 treeMap = new Tree<D>(v[vlen-1]); 1304 for (int j = vlen-2; j >= 0; j--) { 1305 treeMap->fill(v[j]); 1306 } 1307 }; 1308 1309>>>>>>> .r223 1310 1311 MyPmap(World& world, ProcessID owner) : staticmap(true), owner(owner) {}; 1312 1313 MyPmap(World& world, vector<TreeCoords<D> > v) : staticmap(false), owner(1) { // owner???????????????? 1314 buildTreeMap(v); 1315 madness::print(""); 1316 treeMap->print(); 1317 }; 1318 1319 MyPmap(const MyPmap<D>& other) : staticmap(other.staticmap), owner(other.owner), treeMap(other.treeMap) {}; 1320 1321 MyPmap<D>& operator=(const MyPmap<D>& other) { 1322 if (this != &other) { 1323 staticmap = other.staticmap; 1324 owner = other.owner; 1325 treeMap = other.treeMap; 1326 } 1327 return *this; 1328 }; 1329 1330 void print() { 1331 treeMap->print(); 1332 }; 1333 1334 ProcessID Owner(const KeyD& key) const { 1335 if (staticmap) 1336 return staticmap_owner; 1337 else { 1338 ProcessID owner; 1339 treeMap->findOwner(key, &owner); 1340 return owner; 1341 } 1342 }; 1343 }; 1344 1345 template <int D> 1346 class LBTree : public WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD> { 1347 // No new variables necessary 1348 public: 1349<<<<<<< .mine 1350 typedef WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD> dcT; 1351 LBTree() {}; 1352 LBTree(World& world, const SharedPtr< WorldDCPmapInterface<DClass<D>::KeyD> >& pmap) : dcT(world,pmap) { 1353 madness::print("LBTree(world, pmap) constructor"); 1354 this->get_mypmap()->print(); 1355 madness::print("LBTree(world, pmap) constructor (goodbye)"); 1356 }; 1357 template <typename T> 1358 inline void init_tree(SharedPtr< FunctionImpl<T,D> > f, typename DClass<D>::KeyDConst key) { 1359 // find Node associated with key 1360 typename FunctionImpl<T,D>::dcT::iterator it = f->coeffs.find(key); 1361 if (it == f->coeffs.end()) return; 1362 // convert Node to LBNode 1363 NodeData nd; 1364 if (!(it->second.has_children())) { 1365 typename DClass<D>::NodeD lbnode(nd,false); 1366 // insert into "this" 1367 this->insert(key, lbnode); 1368 } else { 1369 typename DClass<D>::NodeD lbnode(nd,true); 1370 // insert into "this" 1371 this->insert(key, lbnode); 1372 // then, call for each child 1373 for (KeyChildIterator<D> kit(key); kit; ++kit) { 1374 this->init_tree<T>(f, kit.key()); 1375 } 1376 } 1377 }; 1378======= 1379 typedef WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD, Pmap> dcT; 1380 1381 LBTree() {}; 1382 LBTree(World& world, const Pmap& pmap) : dcT(world,pmap) { 1383 this->get_procmap().print(); 1384 }; 1385 1386 template <typename T> 1387 inline void init_tree(SharedPtr<FunctionImpl<T,D,Pmap> > f, typename DClass<D>::KeyDConst key) { 1388 // find Node associated with key 1389 typename FunctionImpl<T,D,Pmap>::iterator it = f->find(key); 1390 if (it == f->end()) return; 1391 // convert Node to LBNode 1392 NodeData nd; 1393 if (!(it->second.has_children())) { 1394 typename DClass<D>::NodeD lbnode(nd,false); 1395 // insert into "this" 1396 this->insert(key, lbnode); 1397 } 1398 else { 1399 typename DClass<D>::NodeD lbnode(nd,true); 1400 // insert into "this" 1401 this->insert(key, lbnode); 1402 // then, call for each child 1403 for (KeyChildIterator<D> kit(key); kit; ++kit) { 1404 this->init_tree<T>(f, kit.key()); 1405 } 1406 } 1407 }; 1408>>>>>>> .r223 1409 1410 // Methods: 1411 void print(typename DClass<D>::KeyDConst& key) { 1412 typename DClass<D>::treeT::iterator it = this->find(key); 1413 if (it == this->end()) return; 1414 for (Level i = 0; i < key.level(); i++) cout << " "; 1415 madness::print(key, it->second); 1416 for (KeyChildIterator<D> kit(key); kit; ++kit) { 1417 print(kit.key()); 1418 } 1419 }; 1420 1421 Cost fixCost(typename DClass<D>::KeyDConst& key); 1422 1423 Cost depthFirstPartition(typename DClass<D>::KeyDConst& key, 1424 vector<typename DClass<D>::TreeCoords>* klist, unsigned int npieces, 1425 Cost totalcost = 0, Cost *maxcost = 0); 1426 1427 void rollup(typename DClass<D>::KeyDConst& key); 1428 1429 void meld(typename DClass<D>::KeyDConst& key); 1430 1431 Cost makePartition(typename DClass<D>::KeyDConst& key, 1432 vector<typename DClass<D>::KeyD>* klist, Cost partitionSize, 1433 bool lastPartition, Cost usedUp, bool *atleaf); 1434 1435 void removeCost(typename DClass<D>::KeyDConst& key, Cost c); 1436 1437 Cost computeCost(typename DClass<D>::KeyDConst& key); 1438 1439 // inherited methods 1440 typename WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD>::iterator 1441 end() { 1442 return WorldContainer<typename DClass<D>::KeyD, typename DClass<D>::NodeD>::end(); 1443 }; 1444 1445 typename WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD>::iterator 1446 find(typename DClass<D>::KeyDConst& key) { 1447 return WorldContainer<typename DClass<D>::KeyD, typename DClass<D>::NodeD>::find(key); 1448 }; 1449 1450// const SharedPtr<WorldDCPmapInterface< DClass<D>::KeyD >& get_pmap() { 1451// return WorldContainer<typename DClass<D>::KeyD, typename DClass<D>::NodeD>::get_pmap(); 1452// }; 1453 1454 MyPmap<D>& get_mypmap() { 1455 return *static_cast< MyPmap<D>* >(get_pmap().get()); 1456 }; 1457 1458 }; 1459 1460 template <typename T, int D, typename Pmap=MyPmap<D> > 1461 class LoadBalImpl { 1462 private: 1463 Function<T,D,Pmap> f; 1464 SharedPtr<typename DClass<D>::treeT> skeltree; 1465 1466<<<<<<< .mine 1467 void construct_skel(SharedPtr<FunctionImpl<T,D,Pmap> > f) { 1468 skeltree = SharedPtr<typename DClass<D>::treeT>(new typename DClass<D>::treeT(f->world, 1469 f->coeffs.get_mypmap())); 1470 typename DClass<D>::KeyD root(0); 1471 madness::print("about to initialize tree"); 1472 if (f->world.mpi.rank() == 0) { 1473 skeltree->template init_tree<T>(f,root); 1474 } 1475 madness::print("just initialized tree"); 1476 }; 1477======= 1478 void construct_skel(SharedPtr<FunctionImpl<T,D,Pmap> > f) { 1479 skeltree = SharedPtr<typename DClass<D>::treeT>(new typename DClass<D>::treeT(f->world, 1480 f->get_procmap())); 1481 typename DClass<D>::KeyD root(0); 1482 if (f->world.mpi.rank() == 0) { 1483 skeltree->template init_tree<T>(f,root); 1484 } 1485 }; 1486>>>>>>> .r223 1487 1488 public: 1489<<<<<<< .mine 1490 //Constructors 1491 LoadBalImpl() {}; 1492======= 1493 //Constructors 1494 LoadBalImpl() {}; 1495 LoadBalImpl(Function<T,D,Pmap> f) : f(f) { 1496 construct_skel(f.impl); 1497 }; 1498 ~LoadBalImpl() {}; 1499>>>>>>> .r223 1500 1501 LoadBalImpl(Function<T,D> f) : f(f) { 1502 madness::print("LoadBalImpl (Function) constructor: f.impl", &f.impl); 1503 construct_skel(f.impl); 1504 }; 1505 1506 ~LoadBalImpl() {}; 1507 1508<<<<<<< .mine 1509 //Methods 1510 inline void loadBalance() { 1511 partition(findBestPartition()); 1512 }; 1513======= 1514 void partition(vector<typename DClass<D>::TreeCoords> v) { 1515 // implement partition: copy to new FunctionImpl and replace within f 1516 Pmap pmap(f.impl->world, v); 1517 SharedPtr<FunctionImpl<T,D,Pmap> > newimpl(new FunctionImpl<T,D,Pmap>(*(f.impl.get()),pmap)); 1518 if (f.impl->world.mpi.rank() == 0) { 1519 madness::migrate<T,D,Pmap>(f.impl, newimpl); 1520 Key<D> root(0); 1521 newimpl->print(root); 1522 } 1523 f.impl->world.gop.fence(); 1524 f.impl = newimpl; 1525 }; 1526>>>>>>> .r223 1527 1528 vector<typename DClass<D>::TreeCoords> findBestPartition(); 1529 1530 void partition(vector<typename DClass<D>::TreeCoords> v) { 1531 // implement partition: copy to new FunctionImpl and replace within f 1532 madness::print("partition: at beginning"); 1533 Pmap pmap(f.impl->world, v); 1534 SharedPtr<FunctionImpl<T,D,Pmap> > newimpl(new FunctionImpl<T,D>(*(f.impl.get()),pmap)); // ??????????????????????????????????? 1535 if (f.impl->world.mpi.rank() == 0) { 1536 madness::migrate<T,D,Pmap>(f.impl, newimpl); 1537 } 1538 madness::print("partition: at fence"); 1539 f.impl->world.gop.fence(); 1540 madness::print("partition: after fence"); 1541 f.impl = newimpl; 1542 }; 1543 1544 }; 1545 1546 CompCost computeCompCost(Cost c, int n); 1547 1548 Cost computePartitionSize(Cost cost, unsigned int parts); 1549 1550} 1551 1552#endif 1553 1554 1555 /// Simple distributed map for the tree 1556 template <int NDIM> 1557 class FunctionSimplePmap<NDIM> : public WorldDCPmapInterface< Key<NDIM> > { 1558 private: 1559 World* world; 1560 Level n; 1561 1562 public: 1563 FunctionSimplePmap() : world(0), n(2) {}; 1564 1565 FunctionSimplePmap( 1566 1567 1568 1569 1570 1571 1572// Who knows why these aren't cooperating, so commented out for now 1573 /* 1574 template void migrate_data<double,1>(SharedPtr<FunctionImpl<double,1> > tfrom, 1575 SharedPtr<FunctionImpl<double,1> > tto, DClass<1>::KeyD key); 1576 template void migrate_data<double,2>(SharedPtr<FunctionImpl<double,2> > tfrom, 1577 SharedPtr<FunctionImpl<double,2> > tto, DClass<2>::KeyD key); 1578 template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3> > tfrom, 1579 SharedPtr<FunctionImpl<double,3> > tto, DClass<3>::KeyD key); 1580 template void migrate_data<double,4>(SharedPtr<FunctionImpl<double,4> > tfrom, 1581 SharedPtr<FunctionImpl<double,4> > tto, DClass<4>::KeyD key); 1582 template void migrate_data<double,5>(SharedPtr<FunctionImpl<double,5> > tfrom, 1583 SharedPtr<FunctionImpl<double,5> > tto, DClass<5>::KeyD key); 1584 template void migrate_data<double,6>(SharedPtr<FunctionImpl<double,6> > tfrom, 1585 SharedPtr<FunctionImpl<double,6> > tto, DClass<6>::KeyD key); 1586 1587 template void migrate_data<std::complex<double>,1>(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tfrom, 1588 SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto, DClass<1>::KeyD key); 1589 template void migrate_data<std::complex<double>,2>(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom, 1590 SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto, DClass<2>::KeyD key); 1591 template void migrate_data<std::complex<double>,3>(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom, 1592 SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto, DClass<3>::KeyD key); 1593 template void migrate_data<std::complex<double>,4>(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom, 1594 SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto, DClass<4>::KeyD key); 1595 template void migrate_data<std::complex<double>,5>(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom, 1596 SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto, DClass<5>::KeyD key); 1597 template void migrate_data<std::complex<double>,6>(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom, 1598 SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto, DClass<6>::KeyD key); 1599 1600 template void migrate<double,1,MyPmap<1> >(SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tfrom, 1601 SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tto); 1602 template void migrate<double,2,MyPmap<2> >(SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tfrom, 1603 SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tto); 1604 template void migrate<double,3,MyPmap<3> >(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom, 1605 SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto); 1606 template void migrate<double,4,MyPmap<4> >(SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tfrom, 1607 SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tto); 1608 template void migrate<double,5,MyPmap<5> >(SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tfrom, 1609 SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tto); 1610 template void migrate<double,6,MyPmap<6> >(SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tfrom, 1611 SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tto); 1612 1613 template void migrate<std::complex<double>,1,MyPmap<1> >(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > tfrom, 1614 SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto); 1615 template void migrate<std::complex<double>,2,MyPmap<2> >(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom, 1616 SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto); 1617 template void migrate<std::complex<double>,3,MyPmap<3> >(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom, 1618 SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto); 1619 template void migrate<std::complex<double>,4,MyPmap<4> >(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom, 1620 SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto); 1621 template void migrate<std::complex<double>,5,MyPmap<5> >(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom, 1622 SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto); 1623 template void migrate<std::complex<double>,6,MyPmap<6> >(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom, 1624 SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto); 1625 */ 1626 1627 1628 1629 1630 1631 1632template <typename T, int NDIM> 1633struct TestOp : WorldObject< TestOp<T,NDIM> > { 1634 typedef T resultT; 1635 typedef TestOp<T,NDIM> opT; 1636 const int k; 1637 std::vector<long> v2k; 1638 1639 TestOp(World& world, int k) : WorldObject<opT>(world), k(k), v2k(NDIM) { 1640 this->process_pending(); 1641 for (int i=0; i<NDIM; i++) v2k[i] = 2*k; 1642 }; 1643 1644 1645 double norm(const Key<NDIM>& key, const Displacement<NDIM>& d) const { 1646 if (d.distsq > 2) return 0.0; 1647 else return 1.0; 1648 } 1649 1650 Tensor<T> apply(const Key<NDIM>& key, const Displacement<NDIM>& d, const Tensor<T>& c) const { 1651 print("applying ", key, d); 1652 return Tensor<resultT>(v2k); 1653 } 1654}; 1655 1656 1657 1658 1659namespace madness { 1660 namespace archive { 1661 template <class Archive, class T, int NDIM> 1662 struct ArchiveLoadImpl<Archive,const TestOp<T,NDIM>*> { 1663 static inline void load(const Archive& ar, const TestOp<T,NDIM>*& ptr) { 1664 WorldObject< TestOp<T,NDIM> >* p; 1665 ar & p; 1666 ptr = static_cast< const TestOp<T,NDIM>* >(p); 1667 } 1668 }; 1669 1670 template <class Archive, class T, int NDIM> 1671 struct ArchiveStoreImpl<Archive,const TestOp<T,NDIM>*> { 1672 static inline void store(const Archive& ar, const TestOp<T,NDIM>*const& ptr) { 1673 ar & static_cast< const WorldObject< TestOp<T,NDIM> >* > (ptr); 1674 } 1675 }; 1676 } 1677} 1678 1679 1680 1681 /// Holds info about displacement to neighbor for application of operators 1682 template <int NDIM> 1683 struct Displacement { 1684 Vector<Translation,NDIM> d; 1685 uint64_t distsq; 1686 Displacement() {}; 1687 Displacement(const Vector<int, NDIM>& d) : d(d), distsq(0) { 1688 for (int i=0; i<NDIM; i++) distsq += d[i]*d[i]; 1689 } 1690 1691 bool operator<(const Displacement<NDIM>& other) const { 1692 return distsq < other.distsq; 1693 } 1694 1695 Translation operator[](int i) const {return d[i];} 1696 1697 template <typename Archive> 1698 void serialize(Archive& ar) { 1699 ar & d & distsq; 1700 } 1701 }; 1702 1703 template <int NDIM> 1704 std::ostream& operator<<(std::ostream& s, const Displacement<NDIM>& disp) { 1705 s << disp.d; 1706 return s; 1707 } 1708 1709 1710 1711 /// Simplified interface around hash_map to cache stuff 1712 1713 /// Since insertions into STL containers have the nasty habit of 1714 /// invalidating iterators we actually store shared pointers 1715 template <typename Q> 1716 class SimpleCache { 1717 private: 1718 typedef HASH_MAP_NAMESPACE::hash_map< unsigned long, SharedPtr<Q> > mapT; 1719 typedef std::pair< unsigned long, SharedPtr<Q> > pairT; 1720 mapT cache; 1721 1722 // Turns (n,lx) into key 1723 inline unsigned long key(Level n, long lx) const { 1724 return (n<<25) | (lx+16777216); 1725 } 1726 1727 // Turns (n,displacement) into key 1728 template <int NDIM> 1729 inline unsigned long key(Level n, const Key<NDIM>& d) const { 1730 MADNESS_ASSERT((6+NDIM*4) <= sizeof(unsigned long)*8); 1731 unsigned long k = n<<2; 1732 for (int i=0; i<NDIM; i++) k = (k<<4) | (d.translation()[i]+7); 1733 return k; 1734 } 1735 1736 public: 1737 SimpleCache() : cache() {}; 1738 1739 SimpleCache(const SimpleCache& c) : cache(c.cache) {}; 1740 SimpleCache& operator=(const SimpleCache& c) { 1741 if (this != &c) { 1742 cache.clear(); 1743 cache = c.cache; 1744 } 1745 return *this; 1746 } 1747 1748 /// If (n,index) is present return pointer to cached value, otherwise return NULL 1749 template <typename indexT> 1750 inline const Q* getptr(Level n, const indexT& index) const { 1751 typename mapT::const_iterator test = cache.find(key(n,index)); 1752 if (test == cache.end()) return 0; 1753 return test->second.get(); 1754 } 1755 1756 1757 /// Set value associated with (n,index) 1758 template <typename indexT> 1759 inline void set(Level n, const indexT& index, const Q& val) { 1760 cache.insert(pairT(key(n,index),SharedPtr<Q>(new Q(val)))); 1761 } 1762 }; 1763 1764 1765 1766 1767 1768# You must define additional rules to link your test programs 1769test: $(TEST1OBJ) $(THISLIBDEPEND) 1770 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1771 1772testqm: $(TEST2OBJ) $(THISLIBDEPEND) 1773 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1774 1775jjtests: jjtests.o $(THISLIBDEPEND) 1776 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1777 1778jjtests.o: sdc.h 1779 1780testsuite: $(TEST3OBJ) $(THISLIBDEPEND) 1781 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1782 1783tests-hqi: $(TEST4OBJ) $(THISLIBDEPEND) 1784 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1785 1786testperiodic: $(TEST5OBJ) $(THISLIBDEPEND) 1787 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1788 1789gfit: $(TEST6OBJ) $(THISLIBDEPEND) 1790 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1791 1792testbsh: $(TEST6OBJ) $(THISLIBDEPEND) 1793 $(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS) 1794 1795runvalg2: 1796 mpiexec -np 3 $(VALGRIND) $(VALGOPTS) ./tests-hqi -rio 1797 1798 1799 1800 1801 template <typename T, int NDIM> 1802 Void FunctionImpl<T,NDIM>::ensure_exists(const keyT& key) { 1803 PROFILE_MEMBER_FUNC(FunctionImpl); 1804 if (!coeffs.probe(key)) { 1805 keyT parent = key.parent(); 1806 // If the node does not exist this implies that it will 1807 // be a leaf ... make it here so that we only send one 1808 // request up the tree to make it. 1809 coeffs.insert(key,nodeT(tensorT(),false)); 1810 //madness::print("ensure_exists: sending recur up from", key, "to", parent); 1811 send(coeffs.owner(parent), &implT::recur_down_with_fill, key, parent); 1812 } 1813 return None; 1814 } 1815 1816 1817 template <typename T, int NDIM> 1818 void FunctionImpl<T,NDIM>::widen(bool fence, int ndiff) { 1819 PROFILE_MEMBER_FUNC(FunctionImpl); 1820 double tol = std::min(1e3*thresh, sqrt(thresh)); 1821 for(typename dcT::iterator it=coeffs.begin(); it!=coeffs.end(); ++it) { 1822 const keyT& key = it->first; 1823 const nodeT& node = it->second; 1824 if (node.is_leaf() && node.coeff().normf()>tol) { 1825 for (int axis=0; axis<NDIM; axis++) { 1826 for (int step=-1; step<=1; step+=2) { 1827 keyT neigh = neighbor(key, axis, step); 1828 if (neigh.is_valid()) { 1829 if (ndiff > 0) neigh = neigh.parent(ndiff); 1830 send(coeffs.owner(neigh), &implT::ensure_exists, neigh); 1831 } 1832 } 1833 } 1834 } 1835 } 1836 if (fence) world.gop.fence(); 1837 } 1838 1839 1840 // Widens the support of the tree in preparation for integral operator 1841 void widen(bool fence, int ndiff); 1842 1843 1844 void widen(bool fence = true, int ndiff = 1) { 1845 PROFILE_MEMBER_FUNC(Function); 1846 verify(); 1847 if (is_compressed()) reconstruct(); 1848 impl->widen(fence, ndiff); 1849 if (fence && VERIFY_TREE) verify_tree(); 1850 } 1851 1852 1853 1854 1855 template <typename testT> 1856 void conditional_refine_doit(const testT& test, const keyT& key) { 1857 nodeT& node = coeffs[key]; 1858 if (node.has_coeff() && test(key, node.coeff())) { 1859 tensorT s(cdata.v2k); 1860 s(cdata.s0) = node.coeff(); 1861 s = unfilter(s); 1862 node.clear_coeff(); 1863 node.set_has_children(true); 1864 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 1865 const keyT& child = kit.key(); 1866 task(coeffs.owner(child), &implT:: template conditional_refine_insert_doit<testT>, 1867 test, child, copy(s(child_patch(child)))); 1868 } 1869 } 1870 } 1871 1872 template <typename testT> 1873 void conditional_refine(const testT& test, bool fence) { 1874 MADNESS_ASSERT(!compressed); 1875 for(typename dcT::iterator it=coeffs.begin(); it!=coeffs.end(); ++it) { 1876 const keyT& key = it->first; 1877 conditional_refine_doit(test, key); 1878 } 1879 if (fence) world.gop.fence(); 1880 } 1881 1882 1883 1884 template <typename testT> 1885 void conditional_refine(const testT& test, bool fence=true) { 1886 if (is_compressed()) reconstruct(); 1887 impl->conditional_refine(test, fence); 1888 } 1889 1890 1891 1892 1893 template <typename opT, typename R> 1894 Void do_apply_acc(const opT* op, const FunctionImpl<R,NDIM>* f, const keyT& key, const Tensor<T>& t) { 1895 PROFILE_MEMBER_FUNC(FunctionImpl); 1896 if (!coeffs.probe(key)) coeffs.replace(key, nodeT()); 1897 nodeT& node = coeffs[key]; 1898 1899 // Accumulate into the box 1900 if (node.has_coeff()) { 1901 node.coeff().gaxpy(1.0,t,1.0); 1902 } 1903 else { 1904 node.set_coeff(copy(t)); 1905 // No existing coeff and no children means the node is newly created for 1906 // this operation and we must tell its parent that it exists. 1907 if ((!node.has_children()) && (key.level() > 0)) { 1908 Key<NDIM> parent = key.parent(); 1909 coeffs.send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, coeffs, parent); 1910 } 1911 1912 if (op->dowiden1) { 1913 typename FunctionImpl<R,NDIM>::dcT::const_iterator it = f->coeffs.find(key); 1914 if ((it==f->coeffs.end() || it->second.is_invalid()) && 1915 (t.normf() > truncate_tol(thresh,key))) { 1916 // We just made the first contribution to box that did not 1917 // exist in the source. Make the source box with any 1918 // missing parents and apply the operator to each of them. 1919 recur_down_with_apply(op, f, key.parent(), key, Tensor<R>()); 1920 } 1921 } 1922 1923 } 1924 return None; 1925 } 1926 1927 1928 1929 // This routine MUST be executed in an AM handler for atomicity 1930 template <typename opT, typename R> 1931 Void recur_down_with_apply(const opT* op, 1932 const FunctionImpl<R,NDIM>* cf, 1933 const keyT& key, 1934 const keyT& target, 1935 const Tensor<R>& r) { 1936 1937 PROFILE_MEMBER_FUNC(FunctionImpl); 1938 // We send the coeffs down in this routine so we have effectively 1939 // atomic insert+apply to eliminate a race condition leading to 1940 // double application of the operator. 1941 1942 FunctionImpl<R,NDIM>* f = const_cast<FunctionImpl<R,NDIM>*>(cf); 1943 1944 // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< race condition?????? 1945 1946 if (!f->coeffs.probe(key)) { 1947 //madness::print("not there", key); 1948 f->coeffs.replace(key,FunctionNode<R,NDIM>()); 1949 } 1950 FunctionNode<R,NDIM>& node = f->coeffs[key]; 1951 1952 if (r.size) { 1953 // If r has data then it will be the coefficients for this node. 1954 // If we don't already have coeffs courtesy of someone else then 1955 // insert them and apply the operator. 1956 if (!node.has_coeff()) { 1957 MADNESS_ASSERT(r.iscontiguous()); 1958 node.set_coeff(r); 1959 //madness::print("EXTENDED APPLY", key, node.coeff().normf()); 1960 task(world.rank(), &implT:: template do_apply<opT,R>, op, cf, key, node.coeff()); 1961 if (key.level() == target.level()) return None; // Mission accomplished! 1962 if (!target.is_child_of(key)) return None; // This is a sibling of the correct path 1963 } 1964 } 1965 1966 // If r does not have data or we are not yet at our target then we 1967 // must look at the node to see what to do 1968 1969 // - If key==target 1970 // The coeffs should already have been made (and the operator applied) 1971 // while someone else was making another node. 1972 // 1973 // - Otherwise 1974 // a) Node does not exist ... forward up. Accessing the node in the manner 1975 // above would have made an invalid node ... so this is captured by b) 1976 // b) Node exists but is invalid ... forward up (this means that someone else 1977 // is already trying to make this node ... better would be to attach 1978 // a callback so that when the coeffs are set this task is initiated). 1979 // c) Node exists and has children ... forward down 1980 // d) Node exists and has no children ... recur down 1981 1982 Tensor<R> empty; 1983 1984 if (node.has_coeff()) { // d ... recur down if appropriate 1985 if (key.level() < target.level() && target.is_child_of(key)) { 1986 const Tensor<R>& r = node.coeff(); 1987 Tensor<R> s; 1988 if (r.dim[0] == k) { 1989 Tensor<R> d(cdata.v2k); 1990 d(cdata.s0) = node.coeff()(cdata.s0); 1991 s = unfilter(d); 1992 } 1993 else if (r.dim[0] == 2*k) { 1994 s = unfilter(node.coeff()); 1995 } 1996 else { 1997 MADNESS_EXCEPTION("Uh?",r.dim[0]); 1998 } 1999 2000 node.set_has_children(true); 2001 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2002 const keyT& child = kit.key(); 2003 Tensor<R> ss = copy(s(child_patch(child))); 2004 //madness::print("EXTENDED DOWN-2", key, "to", child, ss.normf()); 2005 task(coeffs.owner(child), &implT:: template recur_down_with_apply<opT,R>, 2006 op, cf, child, target, ss); 2007 } 2008 } 2009 } 2010 else { // a and b ... forward up 2011 keyT parent = key.parent(); 2012 //madness::print("EXTENDED UP", key, "to", parent); 2013 task(coeffs.owner(parent), &implT:: template recur_down_with_apply<opT,R>, op, cf, parent, target, empty); 2014 } 2015 2016 return None; 2017 } 2018 2019 2020 2021 2022 2023 2024 2025 /// Apply one of the separated terms, accumulating into the result 2026 2027 /// !!! Keep this routine exactly consistent with muopxvT so that 2028 /// munorm converges correctly 2029 template <typename T> 2030 void muopxv(Level n, 2031 const ConvolutionData1D<Q>* const ops[NDIM], 2032 const Tensor<T>& f, const Tensor<T>& f0, 2033 Tensor<TENSOR_RESULT_TYPE(T,Q)>& result, 2034 const double tol, 2035 const double musign) const { 2036 PROFILE_MEMBER_FUNC(SeparatedConvolution); 2037 2038 double Rnorm = 1.0; 2039 for (int d=0; d<NDIM; d++) Rnorm *= ops[d]->Rnorm; 2040 if (Rnorm == 0.0) return; 2041 2042 // Temporaries can be optimized away 2043 Tensor<TENSOR_RESULT_TYPE(T,Q)> tmp = inner(f,ops[0]->R,0,0); 2044 for (int d=1; d<NDIM; d++) { 2045 tmp = inner(tmp,ops[d]->R,0,0); 2046 } 2047 result.gaxpy(1.0,tmp,musign); 2048 2049 if (n > 0) { 2050 tmp = inner(f0,ops[0]->T,0,0); 2051 for (int d=1; d<NDIM; d++) { 2052 tmp = inner(tmp,ops[d]->T,0,0); 2053 } 2054 result(s0).gaxpy(1.0,tmp,-musign); 2055 } 2056 } 2057 2058 /// Apply transpose of one of the separated terms, accumulating into the result 2059 2060 /// This is only used when computing the actual 2-norm by the power method 2061 /// !!! Keep this routine exactly consistent with muopxv so that 2062 /// munorm converges correctly 2063 template <typename T> 2064 void muopxvT(Level n, 2065 const ConvolutionData1D<Q>* ops[], 2066 const Tensor<T>& f, const Tensor<T>& f0, 2067 Tensor<TENSOR_RESULT_TYPE(T,Q)>& result, 2068 const double tol, 2069 const double musign) const { 2070 PROFILE_MEMBER_FUNC(SeparatedConvolution); 2071 2072 double Rnorm = 1.0; 2073 for (int d=0; d<NDIM; d++) Rnorm *= ops[d]->Rnorm; 2074 if (Rnorm == 0.0) return; 2075 2076 // Temporaries can be optimized away 2077 Tensor<TENSOR_RESULT_TYPE(T,Q)> tmp = inner(f,ops[0]->R,0,1); 2078 for (int d=1; d<NDIM; d++) { 2079 tmp = inner(tmp,ops[d]->R,0,1); 2080 } 2081 result.gaxpy(1.0,tmp,musign); 2082 2083 if (n > 0) { 2084 tmp = inner(f0,ops[0]->T,0,1); // Slice+copy can be optimized away 2085 for (int d=1; d<NDIM; d++) { 2086 tmp = inner(tmp,ops[d]->T,0,1); 2087 } 2088 result(s0).gaxpy(1.0,tmp,-musign); 2089 } 2090 } 2091 2092 2093 /// Computes the 2-norm of one of the separated terms 2094 double munorm(Level n, const ConvolutionData1D<Q>* ops[]) const { 2095 PROFILE_MEMBER_FUNC(SeparatedConvolution); 2096 Tensor<Q> f(v2k), f0, ff(v2k); 2097 2098 double tol = 1e-20; 2099 2100 f.fillrandom(); 2101 f.scale(1.0/f.normf()); 2102 double evalp = 1.0, eval, ratio=99.0; 2103 for (int iter=0; iter<100; iter++) { 2104 ff.fill(0.0); 2105 f0 = copy(f(s0)); 2106 muopxv(n,ops,f,f0,ff,tol,1.0); 2107 f.fill(0.0); 2108 f0 = copy(ff(s0)); 2109 muopxvT(n,ops,ff,f0,f,tol,1.0); 2110 2111 eval = f.normf(); 2112 if (eval == 0.0) break; 2113 f.scale(1.0/eval); 2114 eval = sqrt(eval); 2115 ratio = eval/evalp; 2116 //std::printf("munorm: %d %10.2e %10.2e %10.2e \n", iter, eval, evalp, ratio); 2117 if (iter>0 && ratio<1.2 && ratio>0.9999) break; // 1.2 was 1.02; >0.9999 was >=1.0 2118 if (iter>10 && eval<tol) break; 2119 evalp = eval; 2120 if (iter == 99) throw "munorm failed"; 2121 } 2122 return eval*ratio; 2123 } 2124 2125 2126 2127 2128 2129 2130 /// Invoked by result to compute the pointwise product result=left*right 2131 2132 /// This version requires all three functions have the same distribution. 2133 /// Should be straightforward to do an efficient version that does not 2134 /// require this but I have not thought about that yet. 2135 /// 2136 /// Possible non-blocking communication and optional fence. 2137 template <typename L, typename R> 2138 void mul(const FunctionImpl<L,NDIM>& left, const FunctionImpl<R,NDIM>& right, bool fence) { 2139 PROFILE_MEMBER_FUNC(FunctionImpl); 2140 typedef std::pair< keyT,Tensor<R> > rpairT; 2141 typedef std::pair< keyT,Tensor<L> > lpairT; 2142 MADNESS_ASSERT(coeffs.get_pmap() == left.coeffs.get_pmap() && \ 2143 coeffs.get_pmap() == right.coeffs.get_pmap()); 2144 // The three possibilities for the relative position of 2145 // the left and right coefficients in the tree are: 2146 // 2147 // 1. left==right 2148 // 2. left>right 2149 // 3. left<right 2150 // 2151 // First loop thru local coeff in left. Handle right at the same level or above. 2152 for (typename FunctionImpl<L,NDIM>::dcT::const_iterator it=left.coeffs.begin(); 2153 it != left.coeffs.end(); 2154 ++it) { 2155 const keyT& key = it->first; 2156 const FunctionNode<L,NDIM>& left_node = it->second; 2157 2158 if (left_node.has_coeff()) { 2159 if (right.coeffs.probe(key)) { 2160 const FunctionNode<R,NDIM>& right_node = right.coeffs.find(key).get()->second; 2161 if (right_node.has_coeff()) { 2162 task(world.rank(), &implT:: template do_mul<L,R>, key, left_node.coeff(), 2163 rpairT(key,right_node.coeff())); // Case 1. 2164 } 2165 } 2166 else { // If right node does not exist then it must be further up the tree 2167 const keyT parent = key.parent(); 2168 Future<rpairT> arg; 2169 right.task(coeffs.owner(parent), &FunctionImpl<R,NDIM>::sock_it_to_me, 2170 parent, arg.remote_ref(world), TaskAttributes::hipri()); 2171 task(world.rank(), &implT:: template do_mul<L,R>, key, left_node.coeff(), arg); // Case 2. 2172 } 2173 } 2174 else if (!coeffs.probe(key)) { 2175 // Interior node 2176 coeffs.replace(key,nodeT(tensorT(),true)); 2177 } 2178 2179 } 2180 2181 // Now loop thru local coeff in right and do case 3. 2182 for (typename FunctionImpl<R,NDIM>::dcT::const_iterator it=right.coeffs.begin(); 2183 it != right.coeffs.end(); 2184 ++it) { 2185 const keyT& key = it->first; 2186 const FunctionNode<R,NDIM>& right_node = it->second; 2187 if (right_node.has_coeff()) { 2188 if (!left.coeffs.probe(key)) { 2189 Future<lpairT> arg; 2190 const keyT& parent = key.parent(); 2191 left.task(coeffs.owner(parent), &FunctionImpl<L,NDIM>::sock_it_to_me, 2192 parent, arg.remote_ref(world), TaskAttributes::hipri()); 2193 task(world.rank(), &implT:: template do_mul<R,L>, key, right_node.coeff(), arg); // Case 3. 2194 } 2195 } 2196 else if (!coeffs.probe(key)) { 2197 // Interior node 2198 coeffs.replace(key,nodeT(tensorT(),true)); 2199 } 2200 2201 } 2202 if (fence) world.gop.fence(); 2203 } 2204 2205 2206 2207 template <typename L, typename R> 2208 Void do_mul_sparse2(const keyT& key, 2209 const std::pair< keyT,Tensor<L> >& larg, 2210 const std::pair< keyT,Tensor<R> >& rarg, 2211 const FunctionImpl<R,NDIM>* right) { 2212 PROFILE_MEMBER_FUNC(FunctionImpl); 2213 2214 if (rarg.second.size > 0) { 2215 if (larg.first == key) { 2216 //madness::print("L*R",key,larg.first,larg.second.size,rarg.first,rarg.second.size); 2217 do_mul(key, larg.second, rarg); 2218 } 2219 else { 2220 //madness::print("R*L",key,larg.first,larg.second.size,rarg.first,rarg.second.size); 2221 do_mul(key, rarg.second, larg); 2222 } 2223 } 2224 else { 2225 coeffs.replace(key, nodeT(tensorT(), true)); // Insert interior node 2226 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2227 typedef std::pair< keyT,Tensor<R> > rpairT; 2228 Future<rpairT> rarg; 2229 right->task(coeffs.owner(kit.key()), &FunctionImpl<R,NDIM>::sock_it_to_me, 2230 kit.key(), rarg.remote_ref(world), TaskAttributes::hipri()); 2231 2232 2233 task(world.rank(), &implT:: template do_mul_sparse2<L,R>, 2234 kit.key(),larg, rarg, right); 2235 } 2236 } 2237 return None; 2238 } 2239 2240 template <typename L, typename R> 2241 Void do_mul_sparse(const Tensor<L>& left_coeff, const FunctionImpl<R,NDIM>* right, double tol, 2242 const keyT& key, double right_norm) { 2243 PROFILE_MEMBER_FUNC(FunctionImpl); 2244 if (left_coeff.normf()*right_norm > truncate_tol(tol,key)) { 2245 typedef std::pair< keyT,Tensor<R> > rpairT; 2246 typedef std::pair< keyT,Tensor<L> > lpairT; 2247 Future<rpairT> rarg; 2248 right->task(coeffs.owner(key), &FunctionImpl<R,NDIM>::sock_it_to_me, 2249 key, rarg.remote_ref(world), TaskAttributes::hipri()); 2250 task(world.rank(), &implT:: template do_mul_sparse2<L,R>, 2251 key ,lpairT(key,left_coeff), rarg, right); 2252 } 2253 else { 2254 coeffs.replace(key, nodeT(tensorT(cdata.vk), false)); // Result is zero 2255 } 2256 return None; 2257 } 2258 2259 template <typename L, typename R> 2260 void mul_sparse(const FunctionImpl<L,NDIM>& left, const FunctionImpl<R,NDIM>& right, double tol, bool fence) { 2261 // Loop thru leaf nodes in left 2262 for (typename FunctionImpl<L,NDIM>::dcT::const_iterator it=left.coeffs.begin(); 2263 it != left.coeffs.end(); 2264 ++it) { 2265 const keyT& key = it->first; 2266 const FunctionNode<L,NDIM>& left_node = it->second; 2267 2268 if (left_node.is_leaf()) { 2269 Future<double> rarg = right.task(right.coeffs.owner(key), &implT::get_norm_tree_recursive, key, TaskAttributes::hipri()); 2270 task(world.rank(), &implT:: template do_mul_sparse<L,R>, left_node.coeff(), &right, tol, key, rarg); 2271 } 2272 else { 2273 coeffs.replace(key, nodeT(tensorT(), true)); // Insert interior node 2274 } 2275 } 2276 if (fence) world.gop.fence(); 2277 } 2278 2279 2280 2281 2282 2283 // reorder subspace in order of decreasing residual norm 2284 for (int i=0; i<=m; i++) { 2285 for (int j=0; j<i; j++) { 2286 if (rnorms[i] > rnorms[j]) { 2287 swap(rvec[i],rvec[j]); 2288 swap(fvec[i],fvec[j]); 2289 swap(rnorms[i],rnorms[j]); 2290 2291 tensorT tmp; 2292 tmp = copy(Q(i,_)); Q(i,_) = Q(j,_); Q(j,_) = tmp; 2293 tmp = copy(Q(_,i)); Q(_,i) = Q(_,j); Q(_,j) = tmp; 2294 } 2295 } 2296 } 2297 print("reordered rnorms", rnorms); 2298 2299 2300 2301 2302 2303 2304 2305// tensorT result(cdata.vk); 2306// if (axis == 2) { 2307// for (int p=0; p<k; p++) { 2308// for (int q=0; q<k; q++) { 2309// for (int r=0; r<k; r++) { 2310// for (int s=0; s<k; s++) { 2311// result(p,q,r) += R(r,s)*c(p,q,s); 2312// } 2313// } 2314// } 2315// } 2316// } 2317// else if (axis == 1) { 2318// for (int p=0; p<k; p++) { 2319// for (int q=0; q<k; q++) { 2320// for (int r=0; r<k; r++) { 2321// for (int s=0; s<k; s++) { 2322// result(p,r,q) += R(r,s)*c(p,s,q); 2323// } 2324// } 2325// } 2326// } 2327// } 2328// else if (axis == 0) { 2329// for (int p=0; p<k; p++) { 2330// for (int q=0; q<k; q++) { 2331// for (int r=0; r<k; r++) { 2332// for (int s=0; s<k; s++) { 2333// result(r,p,q) += R(r,s)*c(s,p,q); 2334// } 2335// } 2336// } 2337// } 2338// } 2339 2340// if ((result - Result).normf() > 1e-10) { 2341// print("BAD", key, axis, (result - Result).normf()); 2342// } 2343 2344 2345 2346 2347 2348 /// 1D Gaussian convolution summed over periodic translations 2349 2350 /// r_periodic(n,l) = sum(R=-maxR,+maxR)[r_nonperiodic(n,l+R*2^n)] 2351 template <typename Q> 2352 class PeriodicGaussianConvolution1D : public Convolution1D<Q> { 2353 public: 2354 2355 const int k; 2356 const int maxR; 2357 GaussianConvolution1D<Q> g; 2358 2359 PeriodicGaussianConvolution1D(int k, int maxR, Q coeff, double expnt, double sign=1.0) 2360 : Convolution1D<Q>(k,k,1.0), k(k), maxR(maxR), g(k,coeff,expnt,sign) {} 2361 2362 virtual ~PeriodicGaussianConvolution1D() {} 2363 2364 Level natural_level() const { 2365 return g.natural_level(); 2366 } 2367 2368 Tensor<Q> rnlp(Level n, Translation lx) const { 2369 Translation twon = Translation(1)<<n; 2370 Tensor<Q> r(2*k); 2371 for (int R=-maxR; R<=maxR; R++) { 2372 r.gaxpy(1.0, g.get_rnlp(n,R*twon+lx), 1.0); 2373 } 2374 return r; 2375 } 2376 2377 bool issmall(Level n, Translation lx) const { 2378 Translation twon = Translation(1)<<n; 2379 for (int R=-maxR; R<=maxR; R++) { 2380 if (!g.issmall(n, R*twon+lx)) return false; 2381 } 2382 return true; 2383 } 2384 }; 2385/* 2386 This file is part of MADNESS. 2387 2388 Copyright (C) <2007> <Oak Ridge National Laboratory> 2389 2390 This program is free software; you can redistribute it and/or modify 2391 it under the terms of the GNU General Public License as published by 2392 the Free Software Foundation; either version 2 of the License, or 2393 (at your option) any later version. 2394 2395 This program is distributed in the hope that it will be useful, 2396 but WITHOUT ANY WARRANTY; without even the implied warranty of 2397 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 2398 GNU General Public License for more details. 2399 2400 You should have received a copy of the GNU General Public License 2401 along with this program; if not, write to the Free Software 2402 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 2403 2404 For more information please contact: 2405 2406 Robert J. Harrison 2407 Oak Ridge National Laboratory 2408 One Bethel Valley Road 2409 P.O. Box 2008, MS-6367 2410 2411 email: harrisonrj@ornl.gov 2412 tel: 865-241-3937 2413 fax: 865-572-0680 2414 2415 2416 $Id$ 2417*/ 2418 2419 2420//#define WORLD_INSTANTIATE_STATIC_TEMPLATES 2421#include <madness/mra/mra.h> 2422#include <madness/world/worldhashmap.h> 2423 2424 2425extern "C" double round(double x); 2426 2427 2428/// \file mra.cc 2429/// \file Declaration and initialization of static data, some implementation, some instantiation 2430 2431namespace madness { 2432 2433 // Definition and initialization of FunctionDefaults static members 2434 // It cannot be an instance of FunctionFactory since we want to 2435 // set the defaults independent of the data type. 2436 2437 template <typename T, int NDIM> 2438 void FunctionCommonData<T,NDIM>::_make_dc_periodic() { 2439 // See ABGV for details 2440 r0 = Tensor<double>(k,k); 2441 rp = Tensor<double>(k,k); 2442 rm = Tensor<double>(k,k); 2443 2444 double iphase = 1.0; 2445 for (int i=0; i<k; i++) { 2446 double jphase = 1.0; 2447 for (int j=0; j<k; j++) { 2448 double gammaij = sqrt(double((2*i+1)*(2*j+1))); 2449 double Kij; 2450 if (((i-j)>0) && (((i-j)%2)==1)) 2451 Kij = 2.0; 2452 else 2453 Kij = 0.0; 2454 2455 r0(i,j) = 0.5*(1.0 - iphase*jphase - 2.0*Kij)*gammaij; 2456 rm(i,j) = 0.5*jphase*gammaij; 2457 rp(i,j) =-0.5*iphase*gammaij; 2458 jphase = -jphase; 2459 } 2460 iphase = -iphase; 2461 } 2462 2463 // Make the rank-1 forms of rm and rp 2464 rm_left = Tensor<double>(k); 2465 rm_right = Tensor<double>(k); 2466 rp_left = Tensor<double>(k); 2467 rp_right = Tensor<double>(k); 2468 2469 iphase = 1.0; 2470 for (int i=0; i<k; i++) { 2471 double gamma = sqrt(0.5*(2*i+1)); 2472 rm_left(i) = rp_right(i) = gamma; 2473 rm_right(i) = rp_left(i) = gamma*iphase; 2474 iphase *= -1.0; 2475 } 2476 rp_left.scale(-1.0); 2477 2478// Tensor<double> rm_test = outer(rm_left,rm_right); 2479// Tensor<double> rp_test = outer(rp_left,rp_right); 2480 } 2481 2482 template <typename T, int NDIM> 2483 void FunctionCommonData<T,NDIM>::_init_twoscale() { 2484 if (! two_scale_hg(k, &hg)) throw "failed to get twoscale coefficients"; 2485 hgT = transpose(hg); 2486 hgsonly = copy(hg(Slice(0,k-1),_)); 2487 } 2488 2489 template <typename T, int NDIM> 2490 void FunctionCommonData<T,NDIM>::_init_quadrature 2491 (int k, int npt, Tensor<double>& quad_x, Tensor<double>& quad_w, 2492 Tensor<double>& quad_phi, Tensor<double>& quad_phiw, Tensor<double>& quad_phit) { 2493 quad_x = Tensor<double>(npt); 2494 quad_w = Tensor<double>(npt); 2495 quad_phi = Tensor<double>(npt,k); 2496 quad_phiw = Tensor<double>(npt,k); 2497 2498 gauss_legendre(npt,0.0,1.0,quad_x.ptr(),quad_w.ptr()); 2499 for (int mu=0; mu<npt; mu++) { 2500 double phi[200]; 2501 legendre_scaling_functions(quad_x(mu),k,phi); 2502 for (int j=0; j<k; j++) { 2503 quad_phi(mu,j) = phi[j]; 2504 quad_phiw(mu,j) = quad_w(mu)*phi[j]; 2505 } 2506 } 2507 quad_phit = transpose(quad_phi); 2508 } 2509 2510 2511 template <typename T, int NDIM> 2512 void FunctionImpl<T,NDIM>::verify_tree() const { 2513 PROFILE_MEMBER_FUNC(FunctionImpl); 2514 world.gop.fence(); // Make sure nothing is going on 2515 2516 // Verify consistency of compression status, existence and size of coefficients, 2517 // and has_children() flag. 2518 for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) { 2519 const keyT& key = it->first; 2520 const nodeT& node = it->second; 2521 bool bad; 2522 2523 if (is_compressed()) { 2524 if (node.has_children()) { 2525 bad = node.coeff().dim[0] != 2*cdata.k; 2526 } 2527 else { 2528 bad = node.coeff().size != 0; 2529 } 2530 } 2531 else { 2532 if (node.has_children()) { 2533 bad = node.coeff().size != 0; 2534 } 2535 else { 2536 bad = node.coeff().dim[0] != cdata.k; 2537 } 2538 } 2539 2540 if (bad) { 2541 print(world.rank(), "FunctionImpl: verify: INCONSISTENT TREE NODE, key =", key, ", node =", node, 2542 ", dim[0] =",node.coeff().dim[0],", compressed =",is_compressed()); 2543 std::cout.flush(); 2544 MADNESS_EXCEPTION("FunctionImpl: verify: INCONSISTENT TREE NODE", 0); 2545 } 2546 } 2547 2548 // Ensure that parents and children exist appropriately 2549 for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) { 2550 const keyT& key = it->first; 2551 const nodeT& node = it->second; 2552 2553 if (key.level() > 0) { 2554 const keyT parent = key.parent(); 2555 typename dcT::const_iterator pit = coeffs.find(parent).get(); 2556 if (pit == coeffs.end()) { 2557 print(world.rank(), "FunctionImpl: verify: MISSING PARENT",key,parent); 2558 std::cout.flush(); 2559 MADNESS_EXCEPTION("FunctionImpl: verify: MISSING PARENT", 0); 2560 } 2561 const nodeT& pnode = pit->second; 2562 if (!pnode.has_children()) { 2563 print(world.rank(), "FunctionImpl: verify: PARENT THINKS IT HAS NO CHILDREN",key,parent); 2564 std::cout.flush(); 2565 MADNESS_EXCEPTION("FunctionImpl: verify: PARENT THINKS IT HAS NO CHILDREN", 0); 2566 } 2567 } 2568 2569 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2570 typename dcT::const_iterator cit = coeffs.find(kit.key()).get(); 2571 if (cit == coeffs.end()) { 2572 if (node.has_children()) { 2573 print(world.rank(), "FunctionImpl: verify: MISSING CHILD",key,kit.key()); 2574 std::cout.flush(); 2575 MADNESS_EXCEPTION("FunctionImpl: verify: MISSING CHILD", 0); 2576 } 2577 } 2578 else { 2579 if (! node.has_children()) { 2580 print(world.rank(), "FunctionImpl: verify: UNEXPECTED CHILD",key,kit.key()); 2581 std::cout.flush(); 2582 MADNESS_EXCEPTION("FunctionImpl: verify: UNEXPECTED CHILD", 0); 2583 } 2584 } 2585 } 2586 } 2587 2588 world.gop.fence(); 2589 } 2590 2591 template <typename T, int NDIM> 2592 T FunctionImpl<T,NDIM>::eval_cube(Level n, coordT x, const tensorT c) const { 2593 PROFILE_MEMBER_FUNC(FunctionImpl); 2594 const int k = cdata.k; 2595 double px[NDIM][k]; 2596 T sum = T(0.0); 2597 2598 for (int i=0; i<NDIM; i++) legendre_scaling_functions(x[i],k,px[i]); 2599 2600 if (NDIM == 1) { 2601 for (int p=0; p<k; p++) 2602 sum += c(p)*px[0][p]; 2603 } 2604 else if (NDIM == 2) { 2605 for (int p=0; p<k; p++) 2606 for (int q=0; q<k; q++) 2607 sum += c(p,q)*px[0][p]*px[1][q]; 2608 } 2609 else if (NDIM == 3) { 2610 for (int p=0; p<k; p++) 2611 for (int q=0; q<k; q++) 2612 for (int r=0; r<k; r++) 2613 sum += c(p,q,r)*px[0][p]*px[1][q]*px[2][r]; 2614 } 2615 else if (NDIM == 4) { 2616 for (int p=0; p<k; p++) 2617 for (int q=0; q<k; q++) 2618 for (int r=0; r<k; r++) 2619 for (int s=0; s<k; s++) 2620 sum += c(p,q,r,s)*px[0][p]*px[1][q]*px[2][r]*px[3][s]; 2621 } 2622 else if (NDIM == 5) { 2623 for (int p=0; p<k; p++) 2624 for (int q=0; q<k; q++) 2625 for (int r=0; r<k; r++) 2626 for (int s=0; s<k; s++) 2627 for (int t=0; t<k; t++) 2628 sum += c(p,q,r,s,t)*px[0][p]*px[1][q]*px[2][r]*px[3][s]*px[4][t]; 2629 } 2630 else if (NDIM == 6) { 2631 for (int p=0; p<k; p++) 2632 for (int q=0; q<k; q++) 2633 for (int r=0; r<k; r++) 2634 for (int s=0; s<k; s++) 2635 for (int t=0; t<k; t++) 2636 for (int u=0; u<k; u++) 2637 sum += c(p,q,r,s,t,u)*px[0][p]*px[1][q]*px[2][r]*px[3][s]*px[4][t]*px[5][u]; 2638 } 2639 else { 2640 MADNESS_EXCEPTION("FunctionImpl:eval_cube:NDIM?",NDIM); 2641 } 2642 return sum*pow(2.0,0.5*NDIM*n)/sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 2643 } 2644 2645 template <typename T, int NDIM> 2646 Void FunctionImpl<T,NDIM>::reconstruct_op(const keyT& key, const tensorT& s) { 2647 PROFILE_MEMBER_FUNC(FunctionImpl); 2648 // Note that after application of an integral operator not all 2649 // siblings may be present so it is necessary to check existence 2650 // and if absent insert an empty leaf node. 2651 // 2652 // If summing the result of an integral operator (i.e., from 2653 // non-standard form) there will be significant scaling function 2654 // coefficients at all levels and possibly difference coefficients 2655 // in leaves, hence the tree may refine as a result. 2656 typename dcT::iterator it = coeffs.find(key).get(); 2657 if (it == coeffs.end()) { 2658 coeffs.replace(key,nodeT(tensorT(),false)); 2659 it = coeffs.find(key).get(); 2660 } 2661 nodeT& node = it->second; 2662 2663 // The integral operator will correctly connect interior nodes 2664 // to children but may leave interior nodes without coefficients 2665 // ... but they still need to sum down so just give them zeros 2666 if (node.has_children() && !node.has_coeff()) { 2667 node.set_coeff(tensorT(cdata.v2k)); 2668 } 2669 2670 if (node.has_children() || node.has_coeff()) { // Must allow for inconsistent state from transform, etc. 2671 tensorT d = node.coeff(); 2672 if (d.size == 0) d = tensorT(cdata.v2k); 2673 if (key.level() > 0) d(cdata.s0) += s; // -- note accumulate for NS summation 2674 d = unfilter(d); 2675 node.clear_coeff(); 2676 node.set_has_children(true); 2677 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2678 const keyT& child = kit.key(); 2679 tensorT ss = copy(d(child_patch(child))); 2680 PROFILE_BLOCK(recon_send); 2681 task(coeffs.owner(child), &implT::reconstruct_op, child, ss); 2682 } 2683 } 2684 else { 2685 if (key.level()) node.set_coeff(copy(s)); 2686 else node.set_coeff(s); 2687 } 2688 return None; 2689 } 2690 2691 2692 template <typename T, int NDIM, typename FF> 2693 //~ void FunctionImpl<T,NDIM>::fcube(const keyT& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, tensorT& fval) const { 2694 void FunctionImpl<T,NDIM>::fcube(const keyT& key, const FF& f, const Tensor<double>& qx, tensorT& fval) const { 2695 PROFILE_MEMBER_FUNC(FunctionImpl); 2696 const Vector<Translation,NDIM>& l = key.translation(); 2697 const Level n = key.level(); 2698 const double h = std::pow(0.5,double(n)); 2699 coordT c; // will hold the point in user coordinates 2700 const int npt = qx.dim[0]; 2701 2702 const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width(); 2703 const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell(); 2704 2705 if (NDIM == 1) { 2706 for (int i=0; i<npt; i++) { 2707 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 2708 fval(i) = f(c); 2709 } 2710 } 2711 else if (NDIM == 2) { 2712 for (int i=0; i<npt; i++) { 2713 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 2714 for (int j=0; j<npt; j++) { 2715 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y 2716 fval(i,j) = f(c); 2717 } 2718 } 2719 } 2720 else if (NDIM == 3) { 2721 for (int i=0; i<npt; i++) { 2722 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 2723 for (int j=0; j<npt; j++) { 2724 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y 2725 for (int k=0; k<npt; k++) { 2726 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z 2727 fval(i,j,k) = f(c); 2728 } 2729 } 2730 } 2731 } 2732 else if (NDIM == 4) { 2733 for (int i=0; i<npt; i++) { 2734 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 2735 for (int j=0; j<npt; j++) { 2736 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y 2737 for (int k=0; k<npt; k++) { 2738 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z 2739 for (int m=0; m<npt; m++) { 2740 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx 2741 fval(i,j,k,m) = f(c); 2742 } 2743 } 2744 } 2745 } 2746 } 2747 else if (NDIM == 5) { 2748 for (int i=0; i<npt; i++) { 2749 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 2750 for (int j=0; j<npt; j++) { 2751 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y 2752 for (int k=0; k<npt; k++) { 2753 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z 2754 for (int m=0; m<npt; m++) { 2755 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx 2756 for (int n=0; n<npt; n++) { 2757 c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy 2758 fval(i,j,k,m,n) = f(c); 2759 } 2760 } 2761 } 2762 } 2763 } 2764 } 2765 else if (NDIM == 6) { 2766 for (int i=0; i<npt; i++) { 2767 c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x 2768 for (int j=0; j<npt; j++) { 2769 c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y 2770 for (int k=0; k<npt; k++) { 2771 c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z 2772 for (int m=0; m<npt; m++) { 2773 c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx 2774 for (int n=0; n<npt; n++) { 2775 c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy 2776 for (int p=0; p<npt; p++) { 2777 c[5] = cell(5,0) + h*cell_width[5]*(l[5] + qx(p)); // zz 2778 fval(i,j,k,m,n,p) = f(c); 2779 } 2780 } 2781 } 2782 } 2783 } 2784 } 2785 } 2786 else { 2787 MADNESS_EXCEPTION("FunctionImpl: fcube: confused about NDIM?",NDIM); 2788 } 2789 } 2790 2791 template <typename T, int NDIM> 2792 Void FunctionImpl<T,NDIM>::project_refine_op(const keyT& key, bool do_refine) { 2793 PROFILE_MEMBER_FUNC(FunctionImpl); 2794 if (do_refine) { 2795 // Make in r child scaling function coeffs at level n+1 2796 tensorT r(cdata.v2k); 2797 for (KeyChildIterator<NDIM> it(key); it; ++it) { 2798 const keyT& child = it.key(); 2799 r(child_patch(child)) = project(child); 2800 } 2801 // Filter then test difference coeffs at level n 2802 tensorT d = filter(r); 2803 tensorT s0; 2804 if (truncate_on_project) s0 = copy(d(cdata.s0)); 2805 d(cdata.s0) = T(0); 2806 2807 if (d.normf()<truncate_tol(thresh,key.level()) || key.level()>=max_refine_level) { 2808 if (key.level()>=max_refine_level) print("MAX REFINE LEVEL",key); 2809 if (truncate_on_project) { 2810 coeffs.replace(key,nodeT(s0,false)); 2811 } 2812 else { 2813 coeffs.replace(key,nodeT(tensorT(),true)); // Insert empty node for parent 2814 for (KeyChildIterator<NDIM> it(key); it; ++it) { 2815 const keyT& child = it.key(); 2816 coeffs.replace(child,nodeT(copy(r(child_patch(child))),false)); 2817 } 2818 } 2819 } 2820 else { 2821 coeffs.replace(key,nodeT(tensorT(),true)); // Insert empty node for parent 2822 for (KeyChildIterator<NDIM> it(key); it; ++it) { 2823 const keyT& child = it.key(); 2824 ProcessID p; 2825 if (FunctionDefaults<NDIM>::get_project_randomize()) { 2826 p = world.random_proc(); 2827 } 2828 else { 2829 p = coeffs.owner(child); 2830 } 2831 PROFILE_BLOCK(proj_refine_send); 2832 task(p, &implT::project_refine_op, child, do_refine); // ugh 2833 } 2834 } 2835 } 2836 else { 2837 coeffs.replace(key,nodeT(project(key),false)); 2838 } 2839 return None; 2840 } 2841 2842 template <typename T, int NDIM> 2843 void FunctionImpl<T,NDIM>::add_scalar_inplace(T t, bool fence) { 2844 std::vector<long> v0(NDIM,0L); 2845 if (is_compressed()) { 2846 if (world.rank() == coeffs.owner(cdata.key0)) { 2847 typename dcT::iterator it = coeffs.find(cdata.key0).get(); 2848 MADNESS_ASSERT(it != coeffs.end()); 2849 nodeT& node = it->second; 2850 MADNESS_ASSERT(node.has_coeff()); 2851 node.coeff()(v0) += t*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 2852 } 2853 } 2854 else { 2855 for(typename dcT::iterator it=coeffs.begin(); it!=coeffs.end(); ++it) { 2856 Level n = it->first.level(); 2857 nodeT& node = it->second; 2858 if (node.has_coeff()) { 2859 node.coeff()(v0) += t*sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*n))); 2860 } 2861 } 2862 } 2863 if (fence) world.gop.fence(); 2864 } 2865 2866 template <typename T, int NDIM> 2867 void FunctionImpl<T,NDIM>::insert_zero_down_to_initial_level(const keyT& key) { 2868 PROFILE_MEMBER_FUNC(FunctionImpl); 2869 if (compressed) initial_level = std::max(initial_level,1); // Otherwise zero function is confused 2870 if (coeffs.is_local(key)) { 2871 if (compressed) { 2872 if (key.level() == initial_level) { 2873 coeffs.replace(key, nodeT(tensorT(), false)); 2874 } 2875 else { 2876 coeffs.replace(key, nodeT(tensorT(cdata.v2k), true)); 2877 } 2878 } 2879 else { 2880 if (key.level()<initial_level) { 2881 coeffs.replace(key, nodeT(tensorT(), true)); 2882 } 2883 else { 2884 coeffs.replace(key, nodeT(tensorT(cdata.vk), false)); 2885 } 2886 } 2887 } 2888 if (key.level() < initial_level) { 2889 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2890 insert_zero_down_to_initial_level(kit.key()); 2891 } 2892 } 2893 2894 } 2895 2896 2897 template <typename T, int NDIM> 2898 Future<bool> FunctionImpl<T,NDIM>::truncate_spawn(const keyT& key, double tol) { 2899 PROFILE_MEMBER_FUNC(FunctionImpl); 2900 typename dcT::iterator it = coeffs.find(key).get(); 2901 if (it == coeffs.end()) { 2902 // In a standard tree all children would exist but some ops (transform) 2903 // can leave the tree in a messy state. Just make the missing node as an 2904 // empty leaf. 2905 coeffs.replace(key,nodeT()); 2906 it = coeffs.find(key).get(); 2907 } 2908 nodeT& node = it->second; 2909 if (node.has_children()) { 2910 std::vector< Future<bool> > v = future_vector_factory<bool>(1<<NDIM); 2911 int i=0; 2912 for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) { 2913 v[i] = task(coeffs.owner(kit.key()), &implT::truncate_spawn, kit.key(), tol, TaskAttributes::generator()); 2914 } 2915 return task(world.rank(),&implT::truncate_op, key, tol, v); 2916 } 2917 else { 2918 // In compressed form leaves should not have coeffs ... however the 2919 // transform op could leave the tree with leaves that do have coeffs 2920 // in which case we want something sensible to happen 2921 //MADNESS_ASSERT(!node.has_coeff()); 2922 if (node.has_coeff() && key.level()>1) { 2923 double dnorm = node.coeff().normf(); 2924 if (dnorm < truncate_tol(tol,key)) { 2925 node.clear_coeff(); 2926 } 2927 } 2928 return Future<bool>(node.has_coeff()); 2929 } 2930 } 2931 2932 2933 template <typename T, int NDIM> 2934 bool FunctionImpl<T,NDIM>::truncate_op(const keyT& key, double tol, const std::vector< Future<bool> >& v) { 2935 PROFILE_MEMBER_FUNC(FunctionImpl); 2936 // If any child has coefficients, a parent cannot truncate 2937 for (int i=0; i<(1<<NDIM); i++) if (v[i].get()) return true; 2938 nodeT& node = coeffs.find(key).get()->second; 2939 2940 // Interior nodes should always have coeffs but transform might 2941 // leave empty interior nodes ... hence just force no coeffs to 2942 // be zero coeff unless it is a leaf. 2943 if (node.has_children() && !node.has_coeff()) node.set_coeff(tensorT(cdata.v2k)); 2944 2945 if (key.level() > 1) { // >1 rather >0 otherwise reconstruct might get confused 2946 double dnorm = node.coeff().normf(); 2947 if (dnorm < truncate_tol(tol,key)) { 2948 node.clear_coeff(); 2949 if (node.has_children()) { 2950 node.set_has_children(false); 2951 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2952 coeffs.erase(kit.key()); 2953 } 2954 } 2955 } 2956 } 2957 return node.has_coeff(); 2958 } 2959 2960 2961 template <typename T, int NDIM> 2962 void FunctionImpl<T,NDIM>::print_tree(Level maxlevel) const { 2963 if (world.rank() == 0) do_print_tree(cdata.key0, maxlevel); 2964 world.gop.fence(); 2965 if (world.rank() == 0) std::cout.flush(); 2966 world.gop.fence(); 2967 } 2968 2969 2970 template <typename T, int NDIM> 2971 void FunctionImpl<T,NDIM>::do_print_tree(const keyT& key, Level maxlevel) const { 2972 typename dcT::const_iterator it = coeffs.find(key).get(); 2973 if (it == coeffs.end()) { 2974 MADNESS_EXCEPTION("FunctionImpl: do_print_tree: null node pointer",0); 2975 } 2976 const nodeT& node = it->second; 2977 for (int i=0; i<key.level(); i++) std::cout << " "; 2978 std::cout << key << " " << node << " --> " << coeffs.owner(key) << "\n"; 2979 for (int i=0; i<key.level(); i++) std::cout << " "; 2980 std::cout << node.coeff() << "\n"; 2981 if (key.level() < maxlevel && node.has_children()) { 2982 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 2983 do_print_tree(kit.key(),maxlevel); 2984 } 2985 } 2986 } 2987 2988 template <typename T, int NDIM> 2989 Tensor<T> FunctionImpl<T,NDIM>::project(const keyT& key) const { 2990 PROFILE_MEMBER_FUNC(FunctionImpl); 2991 MADNESS_ASSERT(cdata.npt == cdata.k); // only necessary due to use of fast transform 2992 tensorT fval(cdata.vq,false); // this will be the returned result 2993 tensorT work(cdata.vk,false); // initially evaluate the function in here 2994 tensorT workq(cdata.vq,false); // initially evaluate the function in here 2995 2996 if (functor) { 2997 fcube(key,*functor,cdata.quad_x,work); 2998 } 2999 else { 3000 MADNESS_EXCEPTION("FunctionImpl: project: confusion about function?",0); 3001 } 3002 3003 work.scale(sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*key.level())))); 3004 //return transform(work,cdata.quad_phiw); 3005 return fast_transform(work,cdata.quad_phiw,fval,workq); 3006 } 3007 3008 template <typename T, int NDIM> 3009 Future<double> FunctionImpl<T,NDIM>::get_norm_tree_recursive(const keyT& key) const { 3010 if (coeffs.probe(key)) { 3011 return Future<double>(coeffs.find(key).get()->second.get_norm_tree()); 3012 } 3013 MADNESS_ASSERT(key.level()); 3014 keyT parent = key.parent(); 3015 return task(coeffs.owner(parent), &implT::get_norm_tree_recursive, parent, TaskAttributes::hipri()); 3016 } 3017 3018 3019 template <typename T, int NDIM> 3020 Void FunctionImpl<T,NDIM>::sock_it_to_me(const keyT& key, 3021 const RemoteReference< FutureImpl< std::pair<keyT,tensorT> > >& ref) const { 3022 PROFILE_MEMBER_FUNC(FunctionImpl); 3023 if (coeffs.probe(key)) { 3024 const nodeT& node = coeffs.find(key).get()->second; 3025 Future< std::pair<keyT,tensorT> > result(ref); 3026 if (node.has_coeff()) { 3027 //madness::print("sock found it with coeff",key); 3028 result.set(std::pair<keyT,tensorT>(key,node.coeff())); 3029 } 3030 else { 3031 //madness::print("sock found it without coeff",key); 3032 result.set(std::pair<keyT,tensorT>(key,tensorT())); 3033 } 3034 } 3035 else { 3036 keyT parent = key.parent(); 3037 //madness::print("sock forwarding to parent",key,parent); 3038 PROFILE_BLOCK(sitome_send); 3039 task(coeffs.owner(parent), &FunctionImpl<T,NDIM>::sock_it_to_me, parent, ref, TaskAttributes::hipri()); 3040 } 3041 return None; 3042 } 3043 3044 template <typename T, int NDIM> 3045 Void FunctionImpl<T,NDIM>::eval(const Vector<double,NDIM>& xin, 3046 const keyT& keyin, 3047 const typename Future<T>::remote_refT& ref) { 3048 3049 PROFILE_MEMBER_FUNC(FunctionImpl); 3050 // This is ugly. We must figure out a clean way to use 3051 // owner computes rule from the container. 3052 Vector<double,NDIM> x = xin; 3053 keyT key = keyin; 3054 Vector<Translation,NDIM> l = key.translation(); 3055 ProcessID me = world.rank(); 3056 while (1) { 3057 ProcessID owner = coeffs.owner(key); 3058 if (owner != me) { 3059 PROFILE_BLOCK(eval_send); 3060 task(owner, &implT::eval, x, key, ref, TaskAttributes::hipri()); 3061 return None; 3062 } 3063 else { 3064 typename dcT::futureT fut = coeffs.find(key); 3065 typename dcT::iterator it = fut.get(); 3066 nodeT& node = it->second; 3067 if (node.has_coeff()) { 3068 Future<T>(ref).set(eval_cube(key.level(), x, node.coeff())); 3069 return None; 3070 } 3071 else { 3072 for (int i=0; i<NDIM; i++) { 3073 double xi = x[i]*2.0; 3074 int li = int(xi); 3075 if (li == 2) li = 1; 3076 x[i] = xi - li; 3077 l[i] = 2*l[i] + li; 3078 } 3079 key = keyT(key.level()+1,l); 3080 } 3081 } 3082 } 3083 //MADNESS_EXCEPTION("should not be here",0); 3084 } 3085 3086 template <typename T, int NDIM> 3087 void FunctionImpl<T,NDIM>::tnorm(const tensorT& t, double* lo, double* hi) const { 3088 PROFILE_MEMBER_FUNC(FunctionImpl); 3089 // Chosen approach looks stupid but it is more accurate 3090 // than the simple approach of summing everything and 3091 // subtracting off the low-order stuff to get the high 3092 // order (assuming the high-order stuff is small relative 3093 // to the low-order) 3094 tensorT work = copy(t); 3095 tensorT tlo = work(cdata.sh); 3096 *lo = tlo.normf(); 3097 tlo.fill(0.0); 3098 *hi = work.normf(); 3099 } 3100 3101 namespace detail { 3102 template <typename A, typename B> 3103 struct noop { 3104 void operator()(const A& a, const B& b) const {}; 3105 3106 template <typename Archive> void serialize(Archive& ar) {} 3107 }; 3108 3109 template <typename T, int NDIM> 3110 struct scaleinplace { 3111 T q; 3112 scaleinplace() {} 3113 scaleinplace(T q) : q(q) {} 3114 void operator()(const Key<NDIM>& key, Tensor<T>& t) const {t.scale(q);} 3115 template <typename Archive> void serialize(Archive& ar) {ar & q;} 3116 }; 3117 3118 template <typename T, int NDIM> 3119 struct squareinplace { 3120 void operator()(const Key<NDIM>& key, Tensor<T>& t) const {t.emul(t);} 3121 template <typename Archive> void serialize(Archive& ar) {} 3122 }; 3123 3124 template <typename T, int NDIM> 3125 struct absinplace { 3126 void operator()(const Key<NDIM>& key, Tensor<T>& t) const {abs(t);} 3127 template <typename Archive> void serialize(Archive& ar) {} 3128 }; 3129 } 3130 3131 template <typename T, int NDIM> 3132 void FunctionImpl<T,NDIM>::scale_inplace(const T q, bool fence) 3133 { 3134 unary_op_coeff_inplace(detail::scaleinplace<T,NDIM>(q), fence); 3135 } 3136 3137 template <typename T, int NDIM> 3138 void FunctionImpl<T,NDIM>::square_inplace(bool fence) { 3139 //unary_op_value_inplace(&implT::autorefine_square_test, detail::squareinplace<T,NDIM>(), fence); 3140 unary_op_value_inplace(detail::squareinplace<T,NDIM>(), fence); 3141 } 3142 3143 template <typename T, int NDIM> 3144 void FunctionImpl<T,NDIM>::phi_for_mul(Level np, Translation lp, Level nc, Translation lc, Tensor<double>& phi) const { 3145 PROFILE_MEMBER_FUNC(FunctionImpl); 3146 double p[200]; 3147 double scale = pow(2.0,double(np-nc)); 3148 for (int mu=0; mu<cdata.npt; mu++) { 3149 double xmu = scale*(cdata.quad_x(mu)+lc) - lp; 3150 MADNESS_ASSERT(xmu>-1e-15 && xmu<(1+1e-15)); 3151 legendre_scaling_functions(xmu,cdata.k,p); 3152 for (int i=0; i<k; i++) phi(i,mu) = p[i]; 3153 } 3154 phi.scale(pow(2.0,0.5*np)); 3155 } 3156 3157 template <typename T, int NDIM> 3158 const Tensor<T> FunctionImpl<T,NDIM>::parent_to_child(const tensorT& s, const keyT& parent, const keyT& child) const { 3159 PROFILE_MEMBER_FUNC(FunctionImpl); 3160 // An invalid parent/child means that they are out of the box 3161 // and it is the responsibility of the caller to worry about that 3162 // ... most likely the coefficients (s) are zero to reflect 3163 // zero B.C. so returning s makes handling this easy. 3164 if (parent == child || parent.is_invalid() || child.is_invalid()) return s; 3165 3166 tensorT result = fcube_for_mul<T>(child, parent, s); 3167 result.scale(sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*child.level())))); 3168 result = transform(result,cdata.quad_phiw); 3169 3170 return result; 3171 } 3172 3173 3174 template <typename T, int NDIM> 3175 T FunctionImpl<T,NDIM>::trace_local() const { 3176 PROFILE_MEMBER_FUNC(FunctionImpl); 3177 std::vector<long> v0(NDIM,0); 3178 T sum = 0.0; 3179 if (compressed) { 3180 if (world.rank() == coeffs.owner(cdata.key0)) { 3181 typename dcT::const_iterator it = coeffs.find(cdata.key0).get(); 3182 if (it != coeffs.end()) { 3183 const nodeT& node = it->second; 3184 if (node.has_coeff()) sum = node.coeff()(v0); 3185 } 3186 } 3187 } 3188 else { 3189 for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) { 3190 const keyT& key = it->first; 3191 const nodeT& node = it->second; 3192 if (node.has_coeff()) sum += node.coeff()(v0)*pow(0.5,NDIM*key.level()*0.5); 3193 } 3194 } 3195 return sum*sqrt(FunctionDefaults<NDIM>::get_cell_volume()); 3196 } 3197 3198 3199 template <typename T, int NDIM> 3200 void FunctionImpl<T,NDIM>::diff(const implT& f, int axis, bool fence) { 3201 PROFILE_MEMBER_FUNC(FunctionImpl); 3202 typedef std::pair<keyT,tensorT> argT; 3203 for(typename dcT::const_iterator it=f.coeffs.begin(); it!=f.coeffs.end(); ++it) { 3204 const keyT& key = it->first; 3205 const nodeT& node = it->second; 3206 if (node.has_coeff()) { 3207 Future<argT> left = f.find_neighbor(key,axis,-1); 3208 argT center(key,node.coeff()); 3209 Future<argT> right = f.find_neighbor(key,axis, 1); 3210 task(world.rank(), &implT::do_diff1, &f, axis, key, left, center, right); 3211 } 3212 else { 3213 // Internal empty node can be safely inserted 3214 coeffs.replace(key,nodeT(tensorT(),true)); 3215 } 3216 } 3217 if (fence) world.gop.fence(); 3218 } 3219 3220 static bool enforce_bc(int bc_left, int bc_right, Level n, Translation& l) { 3221 Translation two2n = 1ul << n; 3222 if (l < 0) { 3223 if (bc_left == 0) { 3224 return false; // Zero BC 3225 } 3226 else if (bc_left == 1) { 3227 l += two2n; // Periodic BC 3228 } 3229 else { 3230 MADNESS_EXCEPTION("enforce_bc: confused left BC?",bc_left); 3231 } 3232 } 3233 else if (l >= two2n) { 3234 if (bc_right == 0) { 3235 return false; // Zero BC 3236 } 3237 else if (bc_right == 1) { 3238 l -= two2n; // Periodic BC 3239 } 3240 else { 3241 MADNESS_EXCEPTION("enforce_bc: confused BC right?",bc_left); 3242 } 3243 } 3244 return true; 3245 } 3246 3247 3248 template <typename T, int NDIM> 3249 Key<NDIM> FunctionImpl<T,NDIM>::neighbor(const keyT& key, int axis, int step) const { 3250 Vector<Translation,NDIM> l = key.translation(); 3251 3252 l[axis] += step; 3253 3254 if (!enforce_bc(bc(axis,0), bc(axis,1), key.level(), l[axis])) { 3255 return keyT::invalid(); 3256 } 3257 else { 3258 return keyT(key.level(),l); 3259 } 3260 } 3261 3262 template <typename T, int NDIM> 3263 Key<NDIM> FunctionImpl<T,NDIM>::neighbor(const keyT& key, const Key<NDIM>& disp) const { 3264 Vector<Translation,NDIM> l = key.translation(); 3265 3266 for (int axis=0; axis<NDIM; axis++) { 3267 l[axis] += disp.translation()[axis]; 3268 3269 if (!enforce_bc(bc(axis,0), bc(axis,1), key.level(), l[axis])) { 3270 return keyT::invalid(); 3271 } 3272 } 3273 return keyT(key.level(),l); 3274 } 3275 3276 template <typename T, int NDIM> 3277 Future< std::pair< Key<NDIM>,Tensor<T> > > 3278 FunctionImpl<T,NDIM>::find_neighbor(const Key<NDIM>& key, int axis, int step) const { 3279 PROFILE_MEMBER_FUNC(FunctionImpl); 3280 typedef std::pair< Key<NDIM>,Tensor<T> > argT; 3281 keyT neigh = neighbor(key, axis, step); 3282 if (neigh.is_invalid()) { 3283 return Future<argT>(argT(neigh,tensorT(cdata.vk))); // Zero bc 3284 } 3285 else { 3286 Future<argT> result; 3287 PROFILE_BLOCK(find_neigh_send); 3288 task(coeffs.owner(neigh), &implT::sock_it_to_me, neigh, result.remote_ref(world), TaskAttributes::hipri()); 3289 return result; 3290 } 3291 } 3292 3293 template <typename T, int NDIM> 3294 Void FunctionImpl<T,NDIM>::forward_do_diff1(const implT* f, int axis, const keyT& key, 3295 const std::pair<keyT,tensorT>& left, 3296 const std::pair<keyT,tensorT>& center, 3297 const std::pair<keyT,tensorT>& right) { 3298 PROFILE_MEMBER_FUNC(FunctionImpl); 3299 ProcessID owner = coeffs.owner(key); 3300 if (owner == world.rank()) { 3301 if (left.second.size == 0) { 3302 task(owner, &implT::do_diff1, f, axis, key, f->find_neighbor(key,axis,-1), center, right, TaskAttributes::hipri()); 3303 } 3304 else if (right.second.size == 0) { 3305 task(owner, &implT::do_diff1, f, axis, key, left, center, f->find_neighbor(key,axis,1), TaskAttributes::hipri()); 3306 } 3307 else { 3308 task(owner, &implT::do_diff2, f, axis, key, left, center, right); 3309 } 3310 } 3311 else { 3312 task(owner, &implT::forward_do_diff1, f, axis, key, left, center, right, TaskAttributes::hipri()); 3313 } 3314 return None; 3315 } 3316 3317 template <typename T, int NDIM> 3318 Void FunctionImpl<T,NDIM>::do_diff1(const implT* f, int axis, const keyT& key, 3319 const std::pair<keyT,tensorT>& left, 3320 const std::pair<keyT,tensorT>& center, 3321 const std::pair<keyT,tensorT>& right) { 3322 PROFILE_MEMBER_FUNC(FunctionImpl); 3323 typedef std::pair<keyT,tensorT> argT; 3324 3325 MADNESS_ASSERT(axis>=0 && axis<NDIM); 3326 3327 if (left.second.size==0 || right.second.size==0) { 3328 // One of the neighbors is below us in the tree ... recur down 3329 coeffs.replace(key,nodeT(tensorT(),true)); 3330 for (KeyChildIterator<NDIM> kit(key); kit; ++kit) { 3331 const keyT& child = kit.key(); 3332 if ((child.translation()[axis]&1) == 0) { 3333 // leftmost child automatically has right sibling 3334 forward_do_diff1(f, axis, child, left, center, center); 3335 } 3336 else { 3337 // rightmost child automatically has left sibling 3338 forward_do_diff1(f, axis, child, center, center, right); 3339 } 3340 } 3341 } 3342 else { 3343 forward_do_diff1(f, axis, key, left, center, right); 3344 } 3345 return None; 3346 } 3347 3348 template <typename T, int NDIM> 3349 Void FunctionImpl<T,NDIM>::do_diff2(const implT* f, int axis, const keyT& key, 3350 const std::pair<keyT,tensorT>& left, 3351 const std::pair<keyT,tensorT>& center, 3352 const std::pair<keyT,tensorT>& right) { 3353 PROFILE_MEMBER_FUNC(FunctionImpl); 3354 typedef std::pair<keyT,tensorT> argT; 3355 3356 tensorT d = madness::inner(cdata.rp, 3357 parent_to_child(left.second, left.first, neighbor(key,axis,-1)).swapdim(axis,0), 3358 1, 0); 3359 inner_result(cdata.r0, 3360 parent_to_child(center.second, center.first, key).swapdim(axis,0), 3361 1, 0, d); 3362 inner_result(cdata.rm, 3363 parent_to_child(right.second, right.first, neighbor(key,axis,1)).swapdim(axis,0), 3364 1, 0, d); 3365 if (axis) d = copy(d.swapdim(axis,0)); // make it contiguous 3366 d.scale(FunctionDefaults<NDIM>::get_rcell_width()[axis]*pow(2.0,(double) key.level())); 3367 coeffs.replace(key,nodeT(d,false)); 3368 return None; 3369 } 3370 3371 template <typename T, int NDIM> 3372 void FunctionImpl<T,NDIM>::mapdim(const implT& f, const std::vector<long>& map, bool fence) { 3373 PROFILE_MEMBER_FUNC(FunctionImpl); 3374 for(typename dcT::const_iterator it=f.coeffs.begin(); it!=f.coeffs.end(); ++it) { 3375 const keyT& key = it->first; 3376 const nodeT& node = it->second; 3377 3378 Vector<Translation,NDIM> l; 3379 for (int i=0; i<NDIM; i++) l[map[i]] = key.translation()[i]; 3380 tensorT c = node.coeff(); 3381 if (c.size) c = copy(c.mapdim(map)); 3382 3383 coeffs.replace(keyT(key.level(),l), nodeT(c,node.has_children())); 3384 } 3385 if (fence) world.gop.fence(); 3386 } 3387 3388 template <typename T, int NDIM> 3389 Future< Tensor<T> > FunctionImpl<T,NDIM>::compress_spawn(const Key<NDIM>& key, bool nonstandard, bool keepleaves) { 3390 PROFILE_MEMBER_FUNC(FunctionImpl); 3391 MADNESS_ASSERT(coeffs.probe(key)); 3392 nodeT& node = coeffs.find(key).get()->second; 3393 if (node.has_children()) { 3394 std::vector< Future<tensorT> > v = future_vector_factory<tensorT>(1<<NDIM); 3395 int i=0; 3396 for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) { 3397 PROFILE_BLOCK(compress_send); 3398 v[i] = task(coeffs.owner(kit.key()), &implT::compress_spawn, kit.key(), nonstandard, keepleaves, TaskAttributes::hipri()); 3399 } 3400 return task(world.rank(),&implT::compress_op, key, v, nonstandard); 3401 } 3402 else { 3403 Future<tensorT> result(node.coeff()); 3404 if (!keepleaves) node.clear_coeff(); 3405 return result; 3406 } 3407 } 3408 3409 template <typename T, int NDIM> 3410 Tensor<T> FunctionImpl<T,NDIM>::eval_plot_cube(const coordT& plotlo, 3411 const coordT& plothi, 3412 const std::vector<long>& npt) const { 3413 PROFILE_MEMBER_FUNC(FunctionImpl); 3414 Tensor<T> r(NDIM, &npt[0]); 3415 // r(___) = 99.0; 3416 MADNESS_ASSERT(!compressed); 3417 3418 coordT h; // Increment between points in each dimension 3419 for (int i=0; i<NDIM; i++) { 3420 if (npt[i] > 1) { 3421 h[i] = (plothi[i]-plotlo[i])/(npt[i]-1); 3422 } 3423 else { 3424 MADNESS_ASSERT(plotlo[i] == plothi[i]); 3425 h[i] = 0.0; 3426 } 3427 } 3428 //print("plot info", plotlo, plothi, npt, h); 3429 3430 // Loop thru local boxes ... THIS NEEDS MULTITHREADING !!! 3431 for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) { 3432 const keyT& key = it->first; 3433 const nodeT& node = it->second; 3434 if (node.has_coeff()) { 3435 //print("Looking at", key); 3436 // Determine the points, if any, of the plot grid that 3437 // are contained within this box 3438 coordT boxlo, boxhi; 3439 Vector<int,NDIM> boxnpt; 3440 double fac = pow(0.5,double(key.level())); 3441 int npttotal = 1; 3442 for (int d=0; d<NDIM; d++) { 3443 // Coords of box 3444 boxlo[d] = fac*key.translation()[d]; 3445 boxhi[d] = boxlo[d]+fac; 3446 3447 if (boxlo[d] > plothi[d] || boxhi[d] < plotlo[d]) { 3448 // Discard boxes out of the plot range 3449 npttotal = boxnpt[d] = 0; 3450 //print("OO range?"); 3451 break; 3452 } 3453 else if (npt[d] == 1) { 3454 // This dimension is only a single point 3455 boxlo[d] = boxhi[d] = plotlo[d]; 3456 boxnpt[d] = 1; 3457 } 3458 else { 3459 // Restrict to plot range 3460// boxlo[d] = std::max(boxlo[d],plotlo[d]); 3461// boxhi[d] = std::min(boxhi[d],plothi[d]); 3462 3463 // Round lo up to next plot point; round hi down 3464 double xlo = long((boxlo[d]-plotlo[d])/h[d])*h[d] + plotlo[d]; 3465 if (xlo < boxlo[d]) xlo += h[d]; 3466 boxlo[d] = xlo; 3467 double xhi = long((boxhi[d]-plotlo[d])/h[d])*h[d] + plotlo[d]; 3468 if (xhi > boxhi[d]) xhi -= h[d]; 3469 // MADNESS_ASSERT(xhi >= xlo); // nope 3470 boxhi[d] = xhi; 3471 boxnpt[d] = long(round((boxhi[d] - boxlo[d])/h[d])) + 1; 3472 } 3473 npttotal *= boxnpt[d]; 3474 } 3475 //print(" box", boxlo, boxhi, boxnpt, npttotal); 3476 if (npttotal > 0) { 3477 const tensorT& coeff = node.coeff(); 3478 const Level n = key.level(); 3479 const Vector<Translation,NDIM>& l = key.translation(); 3480 const double twon = pow(2.0,double(n)); 3481 long ind[NDIM]; 3482 coordT x; 3483 for (IndexIterator it(boxnpt); it; ++it) { 3484 for (int d=0; d<NDIM; d++) { 3485 double xd = boxlo[d] + it[d]*h[d]; // Sim. coords of point 3486 x[d] = twon*xd - l[d]; // Offset within box 3487 MADNESS_ASSERT(x[d]>=0.0 && x[d] <=1.0); // sanity 3488 if (npt[d] > 1) { 3489 ind[d] = long(round((xd-plotlo[d])/h[d])); // Index of plot point 3490 } 3491 else { 3492 ind[d] = 0; 3493 } 3494 MADNESS_ASSERT(ind[d]>=0 && ind[d]<npt[d]); // sanity 3495 } 3496 r(ind) = eval_cube(n, x, coeff); 3497 //print("computing", n, x, ind, r(ind)); 3498 } 3499 } 3500 } 3501 } 3502 3503 // ITERATOR(r, if (r(IND) == 99.0) {print("BAD", IND); error("bad",0);}); 3504 3505 return r; 3506 } 3507 3508 static void dxprintvalue(FILE* f, const double t) { 3509 fprintf(f,"%.6e\n",t); 3510 } 3511 3512 static void dxprintvalue(FILE* f, const double_complex& t) { 3513 fprintf(f,"%.6e %.6e\n", t.real(), t.imag()); 3514 } 3515 3516 template <typename T, int NDIM> 3517 void plotdx(const Function<T,NDIM>& function, 3518 const char* filename, 3519 const Tensor<double>& cell, 3520 const std::vector<long>& npt, 3521 bool binary) { 3522 PROFILE_FUNC; 3523 MADNESS_ASSERT(NDIM<=6); 3524 const char* element[6] = {"lines","quads","cubes","cubes4D","cubes5D","cubes6D"}; 3525 3526 function.verify(); 3527 World& world = const_cast< Function<T,NDIM>& >(function).world(); 3528 FILE *f=0; 3529 if (world.rank() == 0) { 3530 f = fopen(filename, "w"); 3531 if (!f) MADNESS_EXCEPTION("plotdx: failed to open the plot file", 0); 3532 3533 fprintf(f,"object 1 class gridpositions counts "); 3534 for (int d=0; d<NDIM; d++) fprintf(f," %ld",npt[d]); 3535 fprintf(f,"\n"); 3536 3537 fprintf(f,"origin "); 3538 for (int d=0; d<NDIM; d++) fprintf(f, " %.6e", cell(d,0)); 3539 fprintf(f,"\n"); 3540 3541 for (int d=0; d<NDIM; d++) { 3542 fprintf(f,"delta "); 3543 for (int c=0; c<d; c++) fprintf(f, " 0"); 3544 double h = 0.0; 3545 if (npt[d]>1) h = (cell(d,1)-cell(d,0))/(npt[d]-1); 3546 fprintf(f," %.6e", h); 3547 for (int c=d+1; c<NDIM; c++) fprintf(f, " 0"); 3548 fprintf(f,"\n"); 3549 } 3550 fprintf(f,"\n"); 3551 3552 fprintf(f,"object 2 class gridconnections counts "); 3553 for (int d=0; d<NDIM; d++) fprintf(f," %ld",npt[d]); 3554 fprintf(f,"\n"); 3555 fprintf(f, "attribute \"element type\" string \"%s\"\n", element[NDIM-1]); 3556 fprintf(f, "attribute \"ref\" string \"positions\"\n"); 3557 fprintf(f,"\n"); 3558 3559 int npoint = 1; 3560 for (int d=0; d<NDIM; d++) npoint *= npt[d]; 3561 const char* iscomplex = ""; 3562 if (TensorTypeData<T>::iscomplex) iscomplex = "category complex"; 3563 const char* isbinary = ""; 3564 if (binary) isbinary = "binary"; 3565 fprintf(f,"object 3 class array type double %s rank 0 items %d %s data follows\n", 3566 iscomplex, npoint, isbinary); 3567 } 3568 3569 world.gop.fence(); 3570 Tensor<T> r = function.eval_cube(cell, npt); 3571 3572 if (world.rank() == 0) { 3573 if (binary) { 3574 // This assumes that the values are double precision 3575 fflush(f); 3576 fwrite((void *) r.ptr(), sizeof(T), r.size, f); 3577 fflush(f); 3578 } 3579 else { 3580 for (IndexIterator it(npt); it; ++it) { 3581 //fprintf(f,"%.6e\n",r(*it)); 3582 dxprintvalue(f,r(*it)); 3583 } 3584 } 3585 fprintf(f,"\n"); 3586 3587 fprintf(f,"object \"%s\" class field\n",filename); 3588 fprintf(f,"component \"positions\" value 1\n"); 3589 fprintf(f,"component \"connections\" value 2\n"); 3590 fprintf(f,"component \"data\" value 3\n"); 3591 fprintf(f,"\nend\n"); 3592 fclose(f); 3593 } 3594 world.gop.fence(); 3595 } 3596 3597 template <int NDIM> 3598 void FunctionDefaults<NDIM>::set_defaults (World& world) { 3599 k = 7; 3600 thresh = 1e-5; 3601 initial_level = 2; 3602 max_refine_level = 30; 3603 truncate_mode = 0; 3604 refine = true; 3605 autorefine = true; 3606 debug = false; 3607 truncate_on_project = false; 3608 apply_randomize = false; 3609 project_randomize = false; 3610 bc = Tensor<int>(NDIM,2); 3611 cell = Tensor<double>(NDIM,2); 3612 cell(_,1) = 1.0; 3613 recompute_cell_info(); 3614 3615 //pmap = SharedPtr< WorldDCPmapInterface< Key<NDIM> > >(new WorldDCDefaultPmap< Key<NDIM> >(world)); 3616 pmap = SharedPtr< WorldDCPmapInterface< Key<NDIM> > >(new MyPmap<NDIM>(world)); 3617 //pmap = SharedPtr< WorldDCPmapInterface< Key<NDIM> > >(new SimpleMap< Key<NDIM> >(world)); 3618 } 3619 3620 3621 // 3622 // Below here we instantiate templates defined in this file 3623 // 3624 3625 3626 template <typename T, int NDIM> 3627 FunctionCommonData<T,NDIM> FunctionCommonData<T,NDIM>::data[MAXK+1]; 3628 3629 template <int NDIM> int FunctionDefaults<NDIM>::k; 3630 template <int NDIM> double FunctionDefaults<NDIM>::thresh; 3631 template <int NDIM> int FunctionDefaults<NDIM>::initial_level; 3632 template <int NDIM> int FunctionDefaults<NDIM>::max_refine_level; 3633 template <int NDIM> int FunctionDefaults<NDIM>::truncate_mode; 3634 template <int NDIM> bool FunctionDefaults<NDIM>::refine; 3635 template <int NDIM> bool FunctionDefaults<NDIM>::autorefine; 3636 template <int NDIM> bool FunctionDefaults<NDIM>::debug; 3637 template <int NDIM> bool FunctionDefaults<NDIM>::truncate_on_project; 3638 template <int NDIM> bool FunctionDefaults<NDIM>::apply_randomize; 3639 template <int NDIM> bool FunctionDefaults<NDIM>::project_randomize; 3640 template <int NDIM> Tensor<int> FunctionDefaults<NDIM>::bc; 3641 template <int NDIM> Tensor<double> FunctionDefaults<NDIM>::cell; 3642 template <int NDIM> Tensor<double> FunctionDefaults<NDIM>::cell_width; 3643 template <int NDIM> Tensor<double> FunctionDefaults<NDIM>::rcell_width; 3644 template <int NDIM> double FunctionDefaults<NDIM>::cell_volume; 3645 template <int NDIM> double FunctionDefaults<NDIM>::cell_min_width; 3646 template <int NDIM> SharedPtr< WorldDCPmapInterface< Key<NDIM> > > FunctionDefaults<NDIM>::pmap; 3647 3648 template <int NDIM> std::vector< Key<NDIM> > Displacements<NDIM>::disp; 3649 template <int NDIM> std::vector< Key<NDIM> > Displacements<NDIM>::disp_periodicsum[64]; 3650 3651#ifdef FUNCTION_INSTANTIATE_1 3652 template class FunctionDefaults<1>; 3653 template class Function<double, 1>; 3654 template class Function<std::complex<double>, 1>; 3655 template class FunctionImpl<double, 1>; 3656 template class FunctionImpl<std::complex<double>, 1>; 3657 template class FunctionCommonData<double, 1>; 3658 template class FunctionCommonData<double_complex, 1>; 3659 template class Displacements<1>; 3660 3661 template void plotdx<double,1>(const Function<double,1>&, const char*, const Tensor<double>&, 3662 const std::vector<long>&, bool binary); 3663 template void plotdx<double_complex,1>(const Function<double_complex,1>&, const char*, const Tensor<double>&, 3664 const std::vector<long>&, bool binary); 3665#endif 3666 3667#ifdef FUNCTION_INSTANTIATE_2 3668 template class FunctionDefaults<2>; 3669 template class Function<double, 2>; 3670 template class Function<std::complex<double>, 2>; 3671 template class FunctionImpl<double, 2>; 3672 template class FunctionImpl<std::complex<double>, 2>; 3673 template class FunctionCommonData<double, 2>; 3674 template class FunctionCommonData<double_complex, 2>; 3675 template class Displacements<2>; 3676 3677 template void plotdx<double,2>(const Function<double,2>&, const char*, const Tensor<double>&, 3678 const std::vector<long>&, bool binary); 3679 template void plotdx<double_complex,2>(const Function<double_complex,2>&, const char*, const Tensor<double>&, 3680 const std::vector<long>&, bool binary); 3681#endif 3682 3683#ifdef FUNCTION_INSTANTIATE_3 3684 template class FunctionDefaults<3>; 3685 template class Function<double, 3>; 3686 template class Function<std::complex<double>, 3>; 3687 template class FunctionImpl<double, 3>; 3688 template class FunctionImpl<std::complex<double>, 3>; 3689 template class FunctionCommonData<double, 3>; 3690 template class FunctionCommonData<double_complex, 3>; 3691 template class Displacements<3>; 3692 3693 template void plotdx<double,3>(const Function<double,3>&, const char*, const Tensor<double>&, 3694 const std::vector<long>&, bool binary); 3695 template void plotdx<double_complex,3>(const Function<double_complex,3>&, const char*, const Tensor<double>&, 3696 const std::vector<long>&, bool binary); 3697#endif 3698 3699#ifdef FUNCTION_INSTANTIATE_4 3700 template class FunctionDefaults<4>; 3701 template class Function<double, 4>; 3702 template class Function<std::complex<double>, 4>; 3703 template class FunctionImpl<double, 4>; 3704 template class FunctionImpl<std::complex<double>, 4>; 3705 template class FunctionCommonData<double, 4>; 3706 template class FunctionCommonData<double_complex, 4>; 3707 template class Displacements<4>; 3708 3709 template void plotdx<double,4>(const Function<double,4>&, const char*, const Tensor<double>&, 3710 const std::vector<long>&, bool binary); 3711 template void plotdx<double_complex,4>(const Function<double_complex,4>&, const char*, const Tensor<double>&, 3712 const std::vector<long>&, bool binary); 3713#endif 3714 3715#ifdef FUNCTION_INSTANTIATE_5 3716 template class FunctionDefaults<5>; 3717 template class Function<double, 5>; 3718 template class Function<std::complex<double>, 5>; 3719 template class FunctionImpl<double, 5>; 3720 template class FunctionImpl<std::complex<double>, 5>; 3721 template class FunctionCommonData<double, 5>; 3722 template class FunctionCommonData<double_complex, 5>; 3723 template class Displacements<5>; 3724 3725 template void plotdx<double,5>(const Function<double,5>&, const char*, const Tensor<double>&, 3726 const std::vector<long>&, bool binary); 3727 template void plotdx<double_complex,5>(const Function<double_complex,5>&, const char*, const Tensor<double>&, 3728 const std::vector<long>&, bool binary); 3729#endif 3730 3731#ifdef FUNCTION_INSTANTIATE_6 3732 template class FunctionDefaults<6>; 3733 template class Function<double, 6>; 3734 template class Function<std::complex<double>, 6>; 3735 template class FunctionImpl<double, 6>; 3736 template class FunctionImpl<std::complex<double>, 6>; 3737 template class FunctionCommonData<double, 6>; 3738 template class FunctionCommonData<double_complex, 6>; 3739 template class Displacements<6>; 3740 3741 template void plotdx<double,6>(const Function<double,6>&, const char*, const Tensor<double>&, 3742 const std::vector<long>&, bool binary); 3743 template void plotdx<double_complex,6>(const Function<double_complex,6>&, const char*, const Tensor<double>&, 3744 const std::vector<long>&, bool binary); 3745#endif 3746 3747 template <> 3748 ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double> > > GaussianConvolution1DCache<double>::map = ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double> > >(); 3749 3750 template <> 3751 ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double_complex> > > GaussianConvolution1DCache<double_complex>::map = ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double_complex> > >(); 3752} 3753 3754/// Quietly used as a global lock when looking for bugs with multiple threads 3755madness::Mutex THELOCK; 3756 3757