1//         /// Returns vector of keys of all local nodes where predicate is true
2//         template <typename predicateT>
3//         std::vector<keyT> keys (const predicateT& predicate) {
4//             std::vector<keyT> v;
5//             v.reserve(this->size());
6//             for (typename implT::iterator it = this->begin(); it != this->end(); ++it) {
7//                 if (predicate(*it)) v.push_back(it->first);
8//             }
9//             return v;
10//         };
11
12//         /// Returns vector of keys of all local nodes
13//         std::vector<keyT> keys () {
14//             return keys(true_predicate<datumT>());
15//         };
16
17
18//         /// Returns vector of keys of local leaf nodes
19//         std::vector<keyT> leaf_nodes () {
20//             return keys(is_leaf_predicate());
21//         };
22
23
24//         /// Returns vector of keys of all local nodes with coeff
25//         std::vector<keyT> coeff_nodes () {
26//             return keys(has_coeff_predicate());
27//         };
28
29
30        struct true_predicate {
31            bool operator()(const datumT& d) const {
32                return true;
33            };
34        };
35
36
37        struct is_leaf_predicate {
38            bool operator()(const datumT& d) const {
39                return d.second.is_leaf();
40            };
41        };
42
43        struct is_not_leaf_predicate {
44            bool operator()(const datumT& d) const {
45                return !d.second.is_leaf();
46            };
47        };
48
49        struct  has_coeff_predicate {
50            bool operator()(const datumT& d) const {
51                return d.second.has_coeff();
52            };
53        };
54
55        struct  has_no_coeff_predicate {
56            bool operator()(const datumT& d) const {
57                return !d.second.has_coeff();
58            };
59        };
60
61
62
63//         void insert_weird_tree(const keyT& key) {
64// //       for (Level i = 0; i < key.level(); i++) cout << "  ";
65// //       print(key);
66//          Level Nmax = 1;
67//          if (is_local(key)) {
68//              bool has_children = ((key.level() < Nmax) || ((key.level() < 2*Nmax)&&
69//                      (key.translation()[0] == key.translation()[NDIM-1])));
70//              FunctionNode<T,NDIM> node(tensorT(0), has_children);
71//              this->insert(key,node);
72//              if (has_children) {
73//                     for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
74//                      insert_weird_tree(kit.key());
75//                     }
76//              }
77//          }
78//          else if (key.level() < Nmax) {
79//              for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
80//                  insert_weird_tree(kit.key());
81//              }
82//          }
83//         };
84
85
86        /// Optimized filter (inplace, contiguous, no err checking)
87
88        /// Transforms coefficients in s returning result also in s.
89        /// Uses work2 from common data to eliminate temporary creation and
90        /// to increase cache locality.
91        ///
92        /// No communication involved.
93//         inline void filter_inplace(tensorT& s) {
94//             transform_inplace(s, cdata.hgT, cdata.work2);
95//         };
96
97
98        /// Optimized unfilter (see info about filter_inplace)
99
100        /// No communication involved.
101//         inline void unfilter_inplace(tensorT& s) {
102//             transform_inplace(s, cdata.hg, cdata.work2);
103//         };
104//#define WORLD_INSTANTIATE_STATIC_TEMPLATES
105#include <madness/mra/loadbal.h>
106
107using namespace std;
108
109namespace madness {
110
111    typedef int Cost;
112    typedef double CompCost;
113
114    template <typename T, int D>
115    vector<typename DClass<D>::TreeCoords> LoadBalImpl<T,D>::findBestPartition() {
116        vector<typename DClass<D>::TreeCoords> klist;
117        if (this->f.impl->world.mpi.rank() != 0) {
118            print("findBestPartition: leave it to the expert");
119            this->f.impl->world.gop.fence();
120            print("about to do broadcast");
121            unsigned int ksize;
122            this->f.impl->world.gop.template broadcast<unsigned int>(ksize);
123            for (unsigned int i = 0; i < ksize; i++) {
124                typename DClass<D>::TreeCoords t;
125                this->f.impl->world.gop.template broadcast<typename DClass<D>::TreeCoords>(t);
126                klist.push_back(t);
127            }
128            print("done with broadcast");
129            return klist;
130        }
131        unsigned int npieces = this->f.impl->world.nproc();
132        bool notdone = true;
133        int count = 0;
134        vector<vector<typename DClass<D>::TreeCoords> > listoflist;
135        vector<typename DClass<D>::TreeCoords> emptylist;
136        vector<Cost> costlist;
137
138        listoflist.push_back(emptylist);
139        costlist.push_back(0);
140        Cost totalCost = 0;
141
142<<<<<<< .mine
143        madness::print("findBestPartition: about to fixCost");
144=======
145//madness::print("findBestPartition: about to fixCost");
146>>>>>>> .r223
147
148<<<<<<< .mine
149        typename DClass<D>::KeyD root(0);
150        this->skeltree->template fixCost(root);
151        madness::print("findBestPartition: about to depthFirstPartition");
152        //    this->skeltree->print(root);
153        totalCost = this->skeltree->template depthFirstPartition(root, &listoflist[count], npieces,
154                    totalCost, &costlist[count]);
155        //madness::print("findBestPartition: after depthFirstPartition");
156        int size = listoflist[count].size();
157        cout << "Partitioned tree " << count << ":" << endl;
158        for (int i = 0; i < size; i++)
159            listoflist[count][i].print();
160        cout << "Max cost for this tree = " << costlist[count] << endl;
161        cout << endl;
162        if (listoflist[count].size() < npieces)
163            notdone = false;
164        count++;
165=======
166    typename DClass<D>::KeyD root(0);
167    this->skeltree->fixCost(root);
168//madness::print("findBestPartition: about to depthFirstPartition");
169//    this->skeltree->print(root);
170    totalCost = this->skeltree->depthFirstPartition(root, &listoflist[count], npieces,
171	totalCost, &costlist[count]);
172//madness::print("findBestPartition: after depthFirstPartition");
173    int size = listoflist[count].size();
174    cout << "Partitioned tree " << count << ":" << endl;
175    for (int i = 0; i < size; i++)
176	listoflist[count][i].print();
177    cout << "Max cost for this tree = " << costlist[count] << endl;
178    cout << endl;
179    if (listoflist[count].size() < npieces)
180	notdone = false;
181    count++;
182>>>>>>> .r223
183
184<<<<<<< .mine
185        while (notdone) {
186            //	this->skeltree.fixCost<D>(root);
187            this->skeltree->template fixCost(root);
188            //	this->skeltree.rollup<D>(root);
189            this->skeltree->template rollup(root);
190            listoflist.push_back(emptylist);
191            costlist.push_back(0);
192            this->skeltree->template depthFirstPartition(root, &listoflist[count], npieces, totalCost, &costlist[count]);
193            int size = listoflist[count].size();
194            cout << "Partitioned tree " << count << ":" << endl;
195            for (int i = 0; i < size; i++)
196                listoflist[count][i].print();
197            cout << "Max cost for this tree = " << costlist[count] << endl;
198            cout << endl;
199=======
200    while (notdone) {
201	this->skeltree->fixCost(root);
202	this->skeltree->rollup(root);
203	listoflist.push_back(emptylist);
204	costlist.push_back(0);
205	this->skeltree->depthFirstPartition(root, &listoflist[count], npieces, totalCost, &costlist[count]);
206	int size = listoflist[count].size();
207	cout << "Partitioned tree " << count << ":" << endl;
208	for (int i = 0; i < size; i++)
209	    listoflist[count][i].print();
210	cout << "Max cost for this tree = " << costlist[count] << endl;
211	cout << endl;
212
213    	typename DClass<D>::treeT::iterator it = this->skeltree->find(root);
214    	if (it == this->skeltree->end()) return klist;
215    	typename DClass<D>::NodeD node = it->second;
216	if (!(node.has_children()) || (listoflist[count].size() < npieces)) {
217	    notdone = false;
218	}
219	if (listoflist[count].size() < npieces) {
220	    listoflist.erase(listoflist.begin()+count);
221	    break;
222	}
223	count++;
224    }
225    unsigned int shortestList = 0, SL_index, LB_index;
226    Cost loadBalCost = 0;
227    vector<unsigned int> len;
228    for (int i = 0; i < count; i++) {
229	len.push_back(listoflist[i].size());
230	if ((len[i] < shortestList) || (shortestList == 0)) {
231	    shortestList = len[i];
232	    SL_index = i;
233	}
234	else if ((len[i] == shortestList) && (costlist[i] < costlist[SL_index])) {
235	// all things being equal, prefer better balance
236	    shortestList = len[i];
237	    SL_index = i;
238	}
239	if ((costlist[i] < loadBalCost) || (loadBalCost == 0)) {
240	    loadBalCost = costlist[i];
241	    LB_index = i;
242	}
243	else if ((costlist[i] == loadBalCost) && (len[i] < listoflist[LB_index].size())) {
244	// all things being equal, prefer fewer cuts
245	    loadBalCost = costlist[i];
246	    LB_index = i;
247	}
248    }
249>>>>>>> .r223
250
251            //    	typename DClass<D>::treeT::iterator it = this->skeltree.find(root);
252            typename DClass<D>::treeT::iterator it = this->skeltree->find(root);
253            //    	if (it == this->skeltree.end()) return klist;
254            if (it == this->skeltree->end()) return klist;
255            typename DClass<D>::NodeD node = it->second;
256            if (!(node.has_children()) || (listoflist[count].size() < npieces)) {
257                notdone = false;
258            }
259            if (listoflist[count].size() < npieces) {
260                listoflist.erase(listoflist.begin()+count);
261                break;
262            }
263            count++;
264        }
265        unsigned int shortestList = 0, SL_index, LB_index;
266        Cost loadBalCost = 0;
267        vector<unsigned int> len;
268        for (int i = 0; i < count; i++) {
269            len.push_back(listoflist[i].size());
270            if ((len[i] < shortestList) || (shortestList == 0)) {
271                shortestList = len[i];
272                SL_index = i;
273            } else if ((len[i] == shortestList) && (costlist[i] < costlist[SL_index])) {
274                // all things being equal, prefer better balance
275                shortestList = len[i];
276                SL_index = i;
277            }
278            if ((costlist[i] < loadBalCost) || (loadBalCost == 0)) {
279                loadBalCost = costlist[i];
280                LB_index = i;
281            } else if ((costlist[i] == loadBalCost) && (len[i] < listoflist[LB_index].size())) {
282                // all things being equal, prefer fewer cuts
283                loadBalCost = costlist[i];
284                LB_index = i;
285            }
286        }
287
288        cout << "The load balance with the fewest broken links has cost " << costlist[SL_index] <<
289        ", and " << shortestList-1 << " broken links" << endl;
290        for (unsigned int i = 0; i < shortestList; i++) {
291            listoflist[SL_index][i].print();
292        }
293        cout << endl;
294        cout << "The load balance with the best balance has cost " << loadBalCost << ", and " <<
295        listoflist[LB_index].size()-1 << " broken links" << endl;
296        for (unsigned int i = 0; i < listoflist[LB_index].size(); i++) {
297            listoflist[LB_index][i].print();
298        }
299        cout << endl;
300
301        CompCost ccleast = 0;
302        int cc_index;
303        for (int i = 0; i < count; i++) {
304            CompCost cctmp = computeCompCost(costlist[i], len[i]-1);
305            if ((i==0) || (cctmp < ccleast)) {
306                ccleast = cctmp;
307                cc_index = i;
308            }
309        }
310        cout << "The load balance with the best overall computational cost has cost " <<
311        costlist[cc_index] << " and " << len[cc_index]-1 << " broken links" << endl;
312        for (unsigned int i = 0; i < len[cc_index]; i++) {
313            listoflist[cc_index][i].print();
314        }
315        for (unsigned int i = 0; i < len[cc_index]; i++) {
316            klist.push_back(listoflist[cc_index][i]);
317        }
318
319        print("findBestPartition: about to do fence");
320        this->f.impl->world.gop.fence();
321        print("about to do broadcast");
322        unsigned int ksize = klist.size();
323        this->f.impl->world.gop.template broadcast<unsigned int>(ksize);
324        for (unsigned int i=0; i < ksize; i++) {
325            this->f.impl->world.gop.template broadcast<typename DClass<D>::TreeCoords>(klist[i]);
326        }
327        print("done with broadcast");
328
329        return klist;
330    }
331
332<<<<<<< .mine
333=======
334template <int D, typename Pmap>
335Cost LBTree<D,Pmap>::fixCost(typename DClass<D>::KeyDConst& key) {
336//    madness::print("fixCost: key =", key, " is about to be looked for");
337    typename DClass<D>::treeT::iterator it = this->find(key);
338//    madness::print("fixCost: key =", key, " was found (looked for),", (it == this->end()));
339    if (it == this->end()) return 0;
340//    madness::print("fixCost: tree it was found (exists)");
341>>>>>>> .r223
342
343<<<<<<< .mine
344    template <int D>
345    Cost LBTree<D>::fixCost(typename DClass<D>::KeyDConst& key) {
346        madness::print("fixCost: key =", key, " is about to be looked for");
347        typename DClass<D>::treeT::iterator it = this->find(key);
348        madness::print("fixCost: key =", key, " was found (looked for),", (it == this->end()));
349        if (it == this->end()) return 0;
350        madness::print("fixCost: tree it was found (exists)");
351
352        typename DClass<D>::NodeD node = it->second;
353        madness::print("fixCost: got node");
354        NodeData d = node.get_data();
355        madness::print("fixCost: got data from node");
356        d.subcost = d.cost;
357        madness::print("fixCost: assigned node cost to subcost");
358        if (node.has_children()) {
359            madness::print("fixCost: node has children");
360            for (KeyChildIterator<D> kit(key); kit; ++kit) {
361                d.subcost += this->template fixCost(kit.key());
362            }
363        }
364        node.set_data(d);
365        madness::print("fixCost: about to insert key =", key, ",", node.get_data());
366        this->insert(key,node);
367        madness::print("fixCost: inserted node");
368        return d.subcost;
369=======
370    typename DClass<D>::NodeD node = it->second;
371//    madness::print("fixCost: got node");
372    NodeData d = node.get_data();
373//    madness::print("fixCost: got data from node");
374    d.subcost = d.cost;
375//    madness::print("fixCost: assigned node cost to subcost");
376    if (node.has_children())
377    {
378//	madness::print("fixCost: node has children");
379	for (KeyChildIterator<D> kit(key); kit; ++kit) {
380	    d.subcost += this->fixCost(kit.key());
381	}
382>>>>>>> .r223
383    }
384<<<<<<< .mine
385=======
386    node.set_data(d);
387//madness::print("fixCost: about to insert key =", key, ",", node.get_data());
388    this->insert(key,node);
389//madness::print("fixCost: inserted node");
390    return d.subcost;
391}
392>>>>>>> .r223
393
394
395    template <int D>
396    Cost LBTree<D>::depthFirstPartition(typename DClass<D>::KeyDConst& key,
397            vector<typename DClass<D>::TreeCoords>* klist, unsigned int npieces,
398            Cost totalcost, Cost *maxcost) {
399//madness::print("depthFirstPartition: at very beginning");
400<<<<<<< .mine
401        if (totalcost == 0) {
402            totalcost = this->template computeCost(key);
403        }
404        madness::print("depthFirstPartition: totalcost =", totalcost);
405=======
406    if (totalcost == 0) {
407	totalcost = this->computeCost(key);
408    }
409//madness::print("depthFirstPartition: totalcost =", totalcost);
410>>>>>>> .r223
411
412        Cost costLeft = totalcost;
413        int partsLeft = npieces;
414        *maxcost = 0;
415        Cost partitionSize = 0;
416    double facter = 1.1;
417
418<<<<<<< .mine
419        for (int i = npieces-1; i >= 0; i--) {
420            cout << endl << "Beginning partition number " << i << endl;
421            vector<typename DClass<D>::KeyD> tmplist;
422            Cost tpart = computePartitionSize(costLeft, partsLeft);
423            if (tpart > partitionSize) {
424                partitionSize = tpart;
425            }
426            madness::print("depthFirstPartition: partitionSize =", partitionSize);
427            Cost usedUp = 0;
428            bool atleaf = false;
429            usedUp = this->template makePartition(key, &tmplist, partitionSize, (i==0), usedUp, &atleaf);
430            if (*maxcost < usedUp) *maxcost = usedUp;
431            costLeft -= usedUp;
432            partsLeft--;
433            for (unsigned int j = 0; j < tmplist.size(); j++) {
434                klist->push_back(typename DClass<D>::TreeCoords(typename DClass<D>::KeyD(tmplist[j]), i));
435            }
436        }
437        return totalcost;
438=======
439    for (int i = npieces-1; i >= 0; i--) {
440	cout << endl << "Beginning partition number " << i << endl;
441	vector<typename DClass<D>::KeyD> tmplist;
442	Cost tpart = computePartitionSize(costLeft, partsLeft);
443	if ((tpart > partitionSize) || (tpart*facter < partitionSize)) {
444	    partitionSize = tpart;
445	}
446//madness::print("depthFirstPartition: partitionSize =", partitionSize);
447	Cost usedUp = 0;
448	bool atleaf = false;
449	usedUp = this->makePartition(key, &tmplist, partitionSize, (i==0), usedUp, &atleaf);
450	if (*maxcost < usedUp) *maxcost = usedUp;
451	costLeft -= usedUp;
452	partsLeft--;
453	for (unsigned int j = 0; j < tmplist.size(); j++) {
454	    klist->push_back(typename DClass<D>::TreeCoords(typename DClass<D>::KeyD(tmplist[j]), i));
455	}
456>>>>>>> .r223
457    }
458
459    template <int D>
460    void LBTree<D>::rollup(typename DClass<D>::KeyDConst& key) {
461//    madness::print("rollup: at beginning");
462        typename DClass<D>::treeT::iterator it = this->find(key);
463        if (it == this->end()) return;
464
465//    madness::print("rollup: about to get node associated with key",key);
466        typename DClass<D>::NodeD node = it->second;
467        if (!node.has_children()) {
468//	madness::print("rollup: this node has no children; returning");
469            return; // no rolling to be done here.
470        }
471//    madness::print("rollup: this node has children");
472        bool hasleafchild = false;
473        for (KeyChildIterator<D> kit(key); kit; ++kit) {
474            typename DClass<D>::treeT::iterator itc = this->find(kit.key());
475            if (itc != this->end()) {
476//	    madness::print("rollup: found child", kit.key());
477                typename DClass<D>::NodeD c = itc->second;
478                if (!c.has_children()) {
479//		madness::print("rollup: child is leaf");
480                    hasleafchild = true;
481                    break;
482                } else {
483//		madness::print("rollup: child", kit.key(), "has children");
484                }
485            }
486        }
487        if (hasleafchild) {
488//	madness::print("rollup: about to meld with key",key);
489<<<<<<< .mine
490            this->template meld(key);
491        }
492        for (KeyChildIterator<D> kit(key); kit; ++kit) {
493            typename DClass<D>::treeT::iterator itc = this->find(kit.key());
494            if (itc != this->end()) {
495=======
496	this->meld(key);
497    }
498    for (KeyChildIterator<D> kit(key); kit; ++kit) {
499	typename DClass<D>::treeT::iterator itc = this->find(kit.key());
500	if (itc != this->end()) {
501>>>>>>> .r223
502//	    madness::print("rollup: found child", kit.key());
503                typename DClass<D>::NodeD c = itc->second;
504                if (c.has_children()) {
505//		madness::print("rollup: child", kit.key(), "has children");
506<<<<<<< .mine
507                    this->template rollup(kit.key());
508                }
509            }
510        }
511        it = this->find(key);
512        node = it->second;
513        NodeData d = node.get_data();
514        if (d.istaken) {
515            d.istaken = false;
516            node.set_data(d);
517            this->insert(key,node);
518        }
519=======
520		this->rollup(kit.key());
521	    }
522	}
523>>>>>>> .r223
524    }
525
526    template <int D>
527    void LBTree<D>::meld(typename DClass<D>::KeyDConst& key) {
528//    madness::print("meld: at beginning, finding key", key);
529        Cost cheapest = 0;
530        typename DClass<D>::treeT::iterator it = this->find(key);
531        if (it == this->end()) return;
532
533        vector<unsigned int> mylist;
534
535        typename DClass<D>::NodeD node = it->second;
536        unsigned int i = 0;
537//    madness::print("meld: about to iterate over children of key", key);
538        for (KeyChildIterator<D> kit(key); kit; ++kit) {
539//    	madness::print("    meld: iterating over child", i);
540            if (node.has_child(i)) {
541                typename DClass<D>::treeT::iterator itc = this->find(kit.key());
542                if (itc == this->end()) return;
543                typename DClass<D>::NodeD c = itc->second;
544                if (!c.has_children()) {
545//		madness::print("    meld: child",i,"has no children");
546                    Cost cost = c.get_data().cost;
547                    if ((cost < cheapest) || (cheapest == 0)) {
548                        cheapest = cost;
549                        mylist.clear();
550                        mylist.push_back(i);
551                    } else if (cost == cheapest) {
552                        mylist.push_back(i);
553                    }
554                }
555            }
556            i++;
557        }
558
559        if (cheapest == 0) {
560//	madness::print("meld: this node has no leaf children");
561            NodeData d = node.get_data();
562            d.istaken = false;
563            node.set_data(d);
564            this->insert(key,node);
565            return;
566        }
567
568        NodeData d = node.get_data();
569
570        i = 0;
571        int j = 0, mlsize = mylist.size();
572        for (KeyChildIterator<D> kit(key); kit; ++kit) {
573            if (mylist[j] == i) {
574//	    madness::print("meld: found a match, mylist[",j,"] =",i);
575                this->erase(kit.key());
576                node.set_child(mylist[j], false);
577                d.cost += cheapest;
578                j++;
579            }
580            i++;
581            if (j == mlsize) break;
582        }
583        d.istaken = false;
584        node.set_data(d);
585        this->insert(key,node);
586//    madness::print("meld: inserted node back into tree; goodbye!");
587    }
588
589
590    template <int D>
591    Cost LBTree<D>::computeCost(typename DClass<D>::KeyDConst& key) {
592        Cost cost = 0;
593        typename DClass<D>::treeT::iterator it = this->find(key);
594        if (it == this->end()) return cost;
595
596<<<<<<< .mine
597        typename DClass<D>::NodeD node = it->second;
598        for (KeyChildIterator<D> kit(key); kit; ++kit) {
599            cost += this->template computeCost(kit.key());
600        }
601        NodeData d = node.get_data();
602        cost += d.cost;
603
604        d.subcost = cost;
605        node.set_data(d);
606        this->insert(key,node);
607        return cost;
608=======
609    typename DClass<D>::NodeD node = it->second;
610    for (KeyChildIterator<D> kit(key); kit; ++kit) {
611	cost += this->computeCost(kit.key());
612>>>>>>> .r223
613    }
614
615
616    template <int D>
617    Cost LBTree<D>::makePartition(typename DClass<D>::KeyDConst& key,
618                                       vector<typename DClass<D>::KeyD>* klist, Cost partitionSize, bool lastPartition,
619                                       Cost usedUp, bool *atleaf) {
620//    madness::print("at beginning of makePartition: atleaf =", *atleaf);
621        double fudgeFactor = 0.1;
622        Cost maxAddl = (Cost) (fudgeFactor*partitionSize);
623
624        typename DClass<D>::treeT::iterator it = this->find(key);
625        if (it == this->end()) {
626            return usedUp;
627        }
628
629        typename DClass<D>::NodeD node = it->second;
630        NodeData d = node.get_data();
631
632        it = this->end();
633
634<<<<<<< .mine
635        madness::print("makePartition: data for key", key, ":", d);
636        madness::print("makePartition: partitionSize =", partitionSize, ", lastPartition =", lastPartition, ", usedUp =", usedUp);
637=======
638//    madness::print("makePartition: data for key", key, ":", d);
639//    madness::print("makePartition: partitionSize =", partitionSize, ", lastPartition =", lastPartition, ", usedUp =", usedUp);
640>>>>>>> .r223
641
642<<<<<<< .mine
643        if (d.istaken) {
644            madness::print("makePartition: this key is taken");
645            return usedUp;
646        }
647=======
648    if (d.istaken) {
649//	madness::print("makePartition: this key is taken");
650	return usedUp;
651    }
652>>>>>>> .r223
653
654<<<<<<< .mine
655        madness::print("makePartition: back to key", key);
656=======
657//    madness::print("makePartition: back to key", key);
658>>>>>>> .r223
659
660<<<<<<< .mine
661        // if either we're at the last partition, the partition is currently empty
662        // and this is a single item, or there is still room in the partition and
663        // adding this to it won't go above the fudge factor,
664        // then add this piece to the partition.
665        if ((lastPartition) || ((usedUp == 0) && (!node.has_children())) ||
666                ((usedUp < partitionSize) && (d.subcost+usedUp <= partitionSize+maxAddl))) {
667            // add to partition
668            madness::print("makePartition: adding to partition", key);
669            klist->push_back(typename DClass<D>::KeyD(key));
670            d.istaken = true;
671            usedUp += d.subcost;
672            // REMOVE COST FROM FOREPARENTS (implement this)
673            this->template removeCost(key.parent(), d.subcost);
674            node.set_data(d);
675            this->insert(key,node);
676        } else if (usedUp < partitionSize) {
677            // try this node's children (if any)
678            if (node.has_children()) {
679                int i = 0;
680                for (KeyChildIterator<D> kit(key); kit; ++kit) {
681                    if (node.has_child(i)) {
682                        madness::print("makePartition:", key, "recursively calling", kit.key());
683                        usedUp = this->template makePartition(kit.key(), klist, partitionSize, lastPartition,
684                                                              usedUp, atleaf);
685                        if ((*atleaf) || (usedUp >= partitionSize)) {
686                            break;
687                        }
688                    }
689                    i++;
690                }
691            } else {
692                madness::print("makePartition: about to set atleaf = true");
693                *atleaf = true;
694            }
695        }
696        return usedUp;
697=======
698    // if either we're at the last partition, the partition is currently empty
699    // and this is a single item, or there is still room in the partition and
700    // adding this to it won't go above the fudge factor,
701    // then add this piece to the partition.
702    if ((lastPartition) || ((usedUp == 0) && (!node.has_children())) ||
703	((usedUp < partitionSize) && (d.subcost+usedUp <= partitionSize+maxAddl))) {
704	// add to partition
705//	madness::print("makePartition: adding to partition", key);
706	klist->push_back(typename DClass<D>::KeyD(key));
707	d.istaken = true;
708	usedUp += d.subcost;
709	// REMOVE COST FROM FOREPARENTS (implement this)
710	this->removeCost(key.parent(), d.subcost);
711	node.set_data(d);
712	this->insert(key,node);
713>>>>>>> .r223
714    }
715<<<<<<< .mine
716=======
717    else if (usedUp < partitionSize) {
718	// try this node's children (if any)
719	if (node.has_children()) {
720	    int i = 0;
721	    for (KeyChildIterator<D> kit(key); kit; ++kit) {
722		if (node.has_child(i)) {
723//		    madness::print("makePartition:", key, "recursively calling", kit.key());
724		    usedUp = this->makePartition(kit.key(), klist, partitionSize, lastPartition,
725			usedUp, atleaf);
726		    if ((*atleaf) || (usedUp >= partitionSize)) {
727			break;
728		    }
729		}
730		i++;
731	    }
732	}
733	else {
734//	    madness::print("makePartition: about to set atleaf = true");
735	    *atleaf = true;
736	}
737    }
738    return usedUp;
739}
740>>>>>>> .r223
741
742<<<<<<< .mine
743    template <int D>
744    void LBTree<D>::removeCost(typename DClass<D>::KeyDConst& key, Cost c) {
745        madness::print("removeCost: key", key, "owner =", owner(key));
746        this->get_mypmap().print();
747        if (((int) key.level()) < 0) return;
748        typename DClass<D>::treeT::iterator it = this->find(key);
749        madness::print("removeCost: found key");
750        if (it == this->end()) return;
751        typename DClass<D>::NodeD node = it->second;
752        NodeData d = node.get_data();
753        madness::print("removeCost: got data");
754        d.subcost -= c;
755        if (key.level() > 0) {
756            this->template removeCost(key.parent(), c);
757        }
758        madness::print("removeCost: before setting, data =", d);
759        node.set_data(d);
760        madness::print("removeCost: after setting, data =", node.get_data());
761        this->insert(key,node);
762        madness::print("removeCost: after inserting, data = ", node.get_data());
763=======
764template <int D, typename Pmap>
765void LBTree<D,Pmap>::removeCost(typename DClass<D>::KeyDConst& key, Cost c) {
766//madness::print("removeCost: key", key, "owner =", owner(key));
767//this->get_procmap().print();
768    if (((int) key.level()) < 0) return;
769    typename DClass<D>::treeT::iterator it = this->find(key);
770//madness::print("removeCost: found key");
771    if (it == this->end()) return;
772    typename DClass<D>::NodeD node = it->second;
773    NodeData d = node.get_data();
774//madness::print("removeCost: got data");
775    d.subcost -= c;
776    if (key.level() > 0) {
777    	this->removeCost(key.parent(), c);
778>>>>>>> .r223
779    }
780<<<<<<< .mine
781=======
782//madness::print("removeCost: before setting, data =", d);
783    node.set_data(d);
784//madness::print("removeCost: after setting, data =", node.get_data());
785    this->insert(key,node);
786//madness::print("removeCost: after inserting, data = ", node.get_data());
787}
788>>>>>>> .r223
789
790
791    Cost computePartitionSize(Cost cost, unsigned int parts) {
792        return (Cost) ceil(((double) cost)/((double) parts));
793    }
794
795
796    CompCost computeCompCost(Cost c, int n) {
797        CompCost compcost;
798        CompCost cfactor = 0.1, nfactor = 1.0;
799        compcost = cfactor*c + nfactor*n;
800        return compcost;
801    }
802
803
804    template <typename T, int D>
805    void migrate_data(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto,
806                      typename DClass<D>::KeyD key) {
807        typename FunctionImpl<T,D>::iterator it = tfrom->find(key);
808        if (it == tfrom->end()) return;
809
810        FunctionNode<T,D> node = it->second;
811
812        if (node.has_children()) {
813            for (KeyChildIterator<D> kit(key); kit; ++kit) {
814                migrate_data<T,D>(tfrom, tto, kit.key());
815            }
816        }
817        tto->insert(key, node);
818    }
819
820
821<<<<<<< .mine
822    template <typename T, int D>
823    void migrate(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto) {
824        typename DClass<D>::KeyD root(0);
825        print("migrate: at beginning");
826        migrate_data<T,D>(tfrom, tto, root);
827        print("migrate: at end");
828    }
829=======
830template <typename T, int D, typename Pmap>
831void migrate(SharedPtr<FunctionImpl<T,D,Pmap> > tfrom, SharedPtr<FunctionImpl<T,D,Pmap> > tto) {
832    typename DClass<D>::KeyD root(0);
833//print("migrate: at beginning");
834    migrate_data<T,D,Pmap>(tfrom, tto, root);
835//print("migrate: at end");
836}
837>>>>>>> .r223
838
839<<<<<<< .mine
840    // Explicit instantiations for D=1:6
841    template void migrate<double,3,MyPmap<3> >(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom,
842            SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto);
843=======
844// Explicit instantiations for D=1:6
845>>>>>>> .r223
846
847<<<<<<< .mine
848    template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom,
849                                         SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto, DClass<3>::KeyD key);
850=======
851>>>>>>> .r223
852
853<<<<<<< .mine
854
855// Who knows why these aren't cooperating, so commented out for now
856    /*
857    template void migrate_data<double,1>(SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tfrom,
858    	SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tto, DClass<1>::KeyD key);
859    template void migrate_data<double,2>(SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tfrom,
860    	SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tto, DClass<2>::KeyD key);
861    template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom,
862    	SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto, DClass<3>::KeyD key);
863    template void migrate_data<double,4>(SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tfrom,
864    	SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tto, DClass<4>::KeyD key);
865    template void migrate_data<double,5>(SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tfrom,
866    	SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tto, DClass<5>::KeyD key);
867    template void migrate_data<double,6>(SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tfrom,
868    	SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tto, DClass<6>::KeyD key);
869
870    template void migrate_data<std::complex<double>,1>(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tfrom,
871    	SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto, DClass<1>::KeyD key);
872    template void migrate_data<std::complex<double>,2>(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom,
873    	SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto, DClass<2>::KeyD key);
874    template void migrate_data<std::complex<double>,3>(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom,
875    	SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto, DClass<3>::KeyD key);
876    template void migrate_data<std::complex<double>,4>(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom,
877    	SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto, DClass<4>::KeyD key);
878    template void migrate_data<std::complex<double>,5>(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom,
879    	SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto, DClass<5>::KeyD key);
880    template void migrate_data<std::complex<double>,6>(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom,
881    	SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto, DClass<6>::KeyD key);
882
883    template void migrate<double,1,MyPmap<1> >(SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tfrom,
884    	SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tto);
885    template void migrate<double,2,MyPmap<2> >(SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tfrom,
886    	SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tto);
887    template void migrate<double,3,MyPmap<3> >(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom,
888    	SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto);
889    template void migrate<double,4,MyPmap<4> >(SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tfrom,
890    	SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tto);
891    template void migrate<double,5,MyPmap<5> >(SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tfrom,
892    	SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tto);
893    template void migrate<double,6,MyPmap<6> >(SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tfrom,
894    	SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tto);
895
896    template void migrate<std::complex<double>,1,MyPmap<1> >(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > tfrom,
897    	SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto);
898    template void migrate<std::complex<double>,2,MyPmap<2> >(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom,
899    	SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto);
900    template void migrate<std::complex<double>,3,MyPmap<3> >(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom,
901    	SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto);
902    template void migrate<std::complex<double>,4,MyPmap<4> >(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom,
903    	SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto);
904    template void migrate<std::complex<double>,5,MyPmap<5> >(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom,
905    	SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto);
906    template void migrate<std::complex<double>,6,MyPmap<6> >(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom,
907    	SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto);
908    */
909=======
910template void migrate_data<double,1>(SharedPtr<FunctionImpl<double,1,MyProcmap<1> > > tfrom,
911	SharedPtr<FunctionImpl<double,1,MyProcmap<1> > > tto, DClass<1>::KeyD key);
912template void migrate_data<double,2>(SharedPtr<FunctionImpl<double,2,MyProcmap<2> > > tfrom,
913	SharedPtr<FunctionImpl<double,2,MyProcmap<2> > > tto, DClass<2>::KeyD key);
914template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3,MyProcmap<3> > > tfrom,
915	SharedPtr<FunctionImpl<double,3,MyProcmap<3> > > tto, DClass<3>::KeyD key);
916template void migrate_data<double,4>(SharedPtr<FunctionImpl<double,4,MyProcmap<4> > > tfrom,
917	SharedPtr<FunctionImpl<double,4,MyProcmap<4> > > tto, DClass<4>::KeyD key);
918template void migrate_data<double,5>(SharedPtr<FunctionImpl<double,5,MyProcmap<5> > > tfrom,
919	SharedPtr<FunctionImpl<double,5,MyProcmap<5> > > tto, DClass<5>::KeyD key);
920template void migrate_data<double,6>(SharedPtr<FunctionImpl<double,6,MyProcmap<6> > > tfrom,
921	SharedPtr<FunctionImpl<double,6,MyProcmap<6> > > tto, DClass<6>::KeyD key);
922>>>>>>> .r223
923
924    template class LoadBalImpl<double,1,MyPmap<1> >;
925    template class LoadBalImpl<double,2,MyPmap<2> >;
926    template class LoadBalImpl<double,3,MyPmap<3> >;
927    template class LoadBalImpl<double,4,MyPmap<4> >;
928    template class LoadBalImpl<double,5,MyPmap<5> >;
929    template class LoadBalImpl<double,6,MyPmap<6> >;
930
931    template class LoadBalImpl<std::complex<double>,1,MyPmap<1> >;
932    template class LoadBalImpl<std::complex<double>,2,MyPmap<2> >;
933    template class LoadBalImpl<std::complex<double>,3,MyPmap<3> >;
934    template class LoadBalImpl<std::complex<double>,4,MyPmap<4> >;
935    template class LoadBalImpl<std::complex<double>,5,MyPmap<5> >;
936    template class LoadBalImpl<std::complex<double>,6,MyPmap<6> >;
937
938<<<<<<< .mine
939    template class LBTree<1,MyPmap<1> >;
940    template class LBTree<2,MyPmap<2> >;
941    template class LBTree<3,MyPmap<3> >;
942    template class LBTree<4,MyPmap<4> >;
943    template class LBTree<5,MyPmap<5> >;
944    template class LBTree<6,MyPmap<6> >;
945=======
946// Who knows why this isn't cooperating, so commented out for now
947/*
948template void migrate<std::complex<double>,1,MyProcmap<1> >(SharedPtr<FunctionImpl<std::complex<double>,1,MyProcmap<1> > tfrom,
949	SharedPtr<FunctionImpl<std::complex<double>,1,MyProcmap<1> > > tto);
950*/
951template void migrate<std::complex<double>,2,MyProcmap<2> >(SharedPtr<FunctionImpl<std::complex<double>,2,MyProcmap<2> > > tfrom,
952	SharedPtr<FunctionImpl<std::complex<double>,2,MyProcmap<2> > > tto);
953template void migrate<std::complex<double>,3,MyProcmap<3> >(SharedPtr<FunctionImpl<std::complex<double>,3,MyProcmap<3> > > tfrom,
954	SharedPtr<FunctionImpl<std::complex<double>,3,MyProcmap<3> > > tto);
955template void migrate<std::complex<double>,4,MyProcmap<4> >(SharedPtr<FunctionImpl<std::complex<double>,4,MyProcmap<4> > > tfrom,
956	SharedPtr<FunctionImpl<std::complex<double>,4,MyProcmap<4> > > tto);
957template void migrate<std::complex<double>,5,MyProcmap<5> >(SharedPtr<FunctionImpl<std::complex<double>,5,MyProcmap<5> > > tfrom,
958	SharedPtr<FunctionImpl<std::complex<double>,5,MyProcmap<5> > > tto);
959template void migrate<std::complex<double>,6,MyProcmap<6> >(SharedPtr<FunctionImpl<std::complex<double>,6,MyProcmap<6> > > tfrom,
960	SharedPtr<FunctionImpl<std::complex<double>,6,MyProcmap<6> > > tto);
961
962
963template class LoadBalImpl<double,1,MyProcmap<1> >;
964template class LoadBalImpl<double,2,MyProcmap<2> >;
965template class LoadBalImpl<double,3,MyProcmap<3> >;
966template class LoadBalImpl<double,4,MyProcmap<4> >;
967template class LoadBalImpl<double,5,MyProcmap<5> >;
968template class LoadBalImpl<double,6,MyProcmap<6> >;
969
970template class LoadBalImpl<std::complex<double>,1,MyProcmap<1> >;
971template class LoadBalImpl<std::complex<double>,2,MyProcmap<2> >;
972template class LoadBalImpl<std::complex<double>,3,MyProcmap<3> >;
973template class LoadBalImpl<std::complex<double>,4,MyProcmap<4> >;
974template class LoadBalImpl<std::complex<double>,5,MyProcmap<5> >;
975template class LoadBalImpl<std::complex<double>,6,MyProcmap<6> >;
976
977template class LBTree<1,MyProcmap<1> >;
978template class LBTree<2,MyProcmap<2> >;
979template class LBTree<3,MyProcmap<3> >;
980template class LBTree<4,MyProcmap<4> >;
981template class LBTree<5,MyProcmap<5> >;
982template class LBTree<6,MyProcmap<6> >;
983>>>>>>> .r223
984}
985//#define WORLD_INSTANTIATE_STATIC_TEMPLATES
986#ifndef LOADBAL_H
987#define LOADBAL_H
988
989#include <madness/world/MADworld.h>
990#include <madness/mra/key.h>
991#include <madness/mra/mra.h>
992using namespace std;
993
994namespace madness {
995
996    typedef int Cost;
997    typedef double CompCost;
998
999    inline int nearest_power(int me, int d) {
1000        int k = 0;
1001        while (me != 0) {
1002            if (me%d == 0) {
1003                k++;
1004                me/=d;
1005            } else {
1006                break;
1007            }
1008        }
1009        return k;
1010    };
1011
1012    template <typename Data, int D> class LBNode;
1013    template <int D> struct TreeCoords;
1014    template <int D> struct Tree;
1015    template <int D> class MyPmap;
1016    template <int D> class LBTree;
1017    class NodeData;
1018
1019    template <int D>
1020    struct DClass {
1021        typedef Key<D> KeyD;
1022        typedef const Key<D> KeyDConst;
1023        typedef TreeCoords<D> TreeCoords;
1024        typedef Tree<D> Tree;
1025        typedef LBNode<NodeData,D> NodeD;
1026        typedef const LBNode<NodeData,D> NodeDConst;
1027        typedef MyPmap<D> MyPmap;
1028        typedef LBTree<D> treeT;
1029    };
1030
1031    template <typename T, int D>
1032    void migrate(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto);
1033
1034<<<<<<< .mine
1035    template <typename T, int D>
1036    void migrate_data(SharedPtr<FunctionImpl<T,D> > tfrom, SharedPtr<FunctionImpl<T,D> > tto,
1037                      typename DClass<D>::KeyD key);
1038=======
1039template <int D>
1040struct DClass {
1041    typedef Key<D> KeyD;
1042    typedef const Key<D> KeyDConst;
1043    typedef TreeCoords<D> TreeCoords;
1044    typedef Tree<D> Tree;
1045    typedef LBNode<NodeData,D> NodeD;
1046    typedef const LBNode<NodeData,D> NodeDConst;
1047    typedef MyProcmap<D> MyProcMap;
1048    typedef LBTree<D,MyProcMap> treeT;
1049};
1050>>>>>>> .r223
1051
1052    template <typename Data, int D>
1053    class LBNode {
1054    private:
1055        Data data;
1056        std::vector<bool> c;
1057
1058        void allchildren(bool status=false) {
1059            c.clear();
1060            c.assign(dim, status);
1061        };
1062
1063    public:
1064        static int dim;
1065
1066        LBNode() {
1067            data = Data();
1068            allchildren();
1069        };
1070
1071        LBNode(Data d, bool children=false) : data(d) {
1072            allchildren(children);
1073        };
1074
1075        bool has_children() const {
1076            for (int i = 0; i < dim; i++)
1077                if (c[i]) return true;
1078            return false;
1079        };
1080
1081        bool has_child(unsigned int i) const {
1082            return c[i];
1083        };
1084
1085        bool has_child(int i) const {
1086            return c[i];
1087        };
1088
1089        void set_child(int i, bool setto = true) {
1090            c[i] = setto;
1091        };
1092
1093        void set_data(Data d) {
1094            data = d;
1095        };
1096
1097        Data get_data() const {
1098            return data;
1099        };
1100
1101        vector<bool> get_c() const {
1102            return c;
1103        };
1104
1105        template <typename Archive>
1106        void serialize(const Archive& ar) {
1107            ar & data & c;
1108        }
1109    };
1110
1111
1112    template <typename Data, int D>
1113    std::ostream& operator<<(std::ostream& s, const LBNode<Data, D>& node) {
1114        s << "data = " << node.get_data() << ", c = " << node.get_c();
1115        return s;
1116    };
1117
1118    template <int D>
1119    std::ostream& operator<<(std::ostream& s, typename DClass<D>::NodeDConst& node) {
1120        s << "data = " << node.get_data() << ", c = " << node.get_c();
1121        return s;
1122    };
1123
1124
1125    template <typename Data, int D>
1126    int LBNode<Data,D>::dim = power<D>();
1127
1128
1129    class NodeData {
1130        friend std::ostream& operator<<(std::ostream& s, const NodeData& nd);
1131    public:
1132        int cost;
1133        int subcost;
1134        bool istaken;
1135        NodeData(int c = 1, int s = 1, bool i = false) : cost(c), subcost(s), istaken(i) {};
1136        template <typename Archive>
1137        void serialize(const Archive& ar) {
1138            ar & cost & subcost & istaken;
1139        };
1140        void print() {
1141            cout << "cost = " << cost << ", subcost = " << subcost << ", istaken = " << istaken << endl;
1142        };
1143    };
1144
1145
1146    inline std::ostream& operator<<(std::ostream& s, const NodeData& nd) {
1147        s << "cost " << nd.cost << ", subcost " << nd.subcost << ", istaken " << nd.istaken;
1148        return s;
1149    };
1150
1151
1152
1153    template <int D>
1154    struct TreeCoords {
1155        Key<D> key;
1156        ProcessID owner;
1157
1158        TreeCoords(const Key<D> k, ProcessID o) : key(Key<D>(k)), owner(o) {};
1159        TreeCoords(const TreeCoords& t) : key(Key<D>(t.key)), owner(t.owner) {};
1160        TreeCoords() : key(Key<D>()), owner(-1) {};
1161        void print() const {
1162            madness::print(key, "   owner =", owner);
1163        };
1164
1165        bool operator< (const TreeCoords t) const {
1166            return (this->key < t.key);
1167        };
1168    };
1169
1170
1171
1172    template <int D>
1173    struct Tree {
1174        TreeCoords<D> data;
1175        vector<SharedPtr<Tree> > children;
1176        Tree* parent;
1177
1178        Tree() {};
1179        Tree(TreeCoords<D> d) : data(d), parent(0) {};
1180        Tree(TreeCoords<D> d, Tree* p) : data(d), parent(p) {};
1181
1182        Tree(const Tree<D>& tree) : data(tree.data), parent(0) {};
1183        Tree(const Tree<D>& tree, Tree* p) : data(tree.data), parent(p) {};
1184
1185        Tree<D>& operator=(const Tree<D>& other) {
1186            if (this != &other) {
1187                this->data = other.data;
1188                this->parent = other.parent;
1189                this->children = other.children;
1190            }
1191            return *this;
1192        };
1193
1194        void insertChild(TreeCoords<D> d) {
1195            Tree* c = new Tree(d, this);
1196            children.insert(children.begin(),SharedPtr<Tree<D> > (c));
1197        };
1198
1199        void insertChild(const Tree<D>& tree) {
1200            Tree* c = new Tree(tree, this);
1201            children.insert(children.begin(),SharedPtr<Tree<D> > (c));
1202        };
1203
1204        void print() {
1205            data.print();
1206            int csize = children.size();
1207            for (int j = 0; j < csize; j++) {
1208                children[j]->print();
1209            }
1210        };
1211
1212        bool isForeparentOf(Key<D> key) const {
1213            return (this->data.key.is_parent_of(key));
1214        };
1215
1216        void findOwner(const Key<D> key, ProcessID *ow) const {
1217//madness::print("findOwner: at node", this->data.key);
1218            if (this->isForeparentOf(key)) {
1219//madness::print("findOwner: node", this->data.key, "is foreparent of", key, "so owner =", this->data.owner);
1220                *ow = this->data.owner;
1221                if (this->data.key.level() < key.level()) {
1222                    int csize = children.size();
1223                    for (int j = 0; j < csize; j++) {
1224//madness::print("findOwner: recursively call on ", this->children[j]->data.key);
1225                        children[j]->findOwner(key, ow);
1226                    }
1227                }
1228            }
1229        };
1230
1231        bool fill(TreeCoords<D> node) {
1232            bool success = false;
1233            if (this->isForeparentOf(node.key)) {
1234                int csize = children.size();
1235                for (int i = 0; i < csize; i++) {
1236                    if (children[i]->isForeparentOf(node.key)) {
1237                        success = children[i]->fill(node);
1238                    }
1239                }
1240                if (!success) {
1241                    this->insertChild(node);
1242                    success = true;
1243                }
1244            }
1245            return success;
1246        }
1247    };
1248
1249
1250
1251    template <int D>
1252    class MyPmap : public WorldDCPmapInterface< Key<D> > {
1253    private:
1254        bool static_map;
1255        const ProcessID staticmap_owner;
1256        Tree<D>* treeMap;
1257        typedef Key<D> KeyD;
1258
1259        void buildTreeMap(vector<TreeCoords<D> > v) {
1260            sort(v.begin(), v.end());
1261            int vlen = v.size();
1262
1263            if (vlen == 0) throw "empty map!!!";
1264
1265<<<<<<< .mine
1266            treeMap = new Tree<D>(v[vlen-1]);
1267            for (int j = vlen-2; j >= 0; j--) {
1268                treeMap->fill(v[j]);
1269            }
1270        };
1271=======
1272    ProcessID getOwner(const KeyD& key) const {
1273	ProcessID owner;
1274	treeMap->findOwner(key, &owner);
1275	return owner;
1276    };
1277>>>>>>> .r223
1278
1279
1280    public:
1281        MyPmap() : staticmap(false), staticmap_owner(0) {};
1282
1283        MyPmap(World& world) : staticmap(false), staticmap_owner(0) {
1284            int NP = world.nproc();
1285            int twotoD = power<D>();
1286            const int level = nearest_power(NP, twotoD);
1287            int NPin = (int) pow((double)twotoD,level);
1288            vector<TreeCoords<D> > v;
1289
1290<<<<<<< .mine
1291            for (Translation i=0; i < (Translation)NPin; i++) {
1292                KeyD key(level,i);
1293                if ((i%twotoD) == 0) {
1294                    key = key.parent(nearest_power(NPin-i, twotoD));
1295                }
1296                v.push_back(TreeCoords<D>(key,i));
1297            }
1298            buildTreeMap(v);
1299            madness::print("MyPmap constructor");
1300            treeMap->print();
1301        };
1302=======
1303	treeMap = new Tree<D>(v[vlen-1]);
1304	for (int j = vlen-2; j >= 0; j--) {
1305	    treeMap->fill(v[j]);
1306	}
1307    };
1308
1309>>>>>>> .r223
1310
1311        MyPmap(World& world, ProcessID owner) : staticmap(true), owner(owner) {};
1312
1313        MyPmap(World& world, vector<TreeCoords<D> > v) : staticmap(false), owner(1) { // owner????????????????
1314            buildTreeMap(v);
1315            madness::print("");
1316            treeMap->print();
1317        };
1318
1319        MyPmap(const MyPmap<D>& other) : staticmap(other.staticmap), owner(other.owner), treeMap(other.treeMap) {};
1320
1321        MyPmap<D>& operator=(const MyPmap<D>& other) {
1322            if (this != &other) {
1323                staticmap = other.staticmap;
1324                owner = other.owner;
1325                treeMap = other.treeMap;
1326            }
1327            return *this;
1328        };
1329
1330        void print() {
1331            treeMap->print();
1332        };
1333
1334        ProcessID Owner(const KeyD& key) const {
1335            if (staticmap)
1336                return staticmap_owner;
1337            else {
1338                ProcessID owner;
1339                treeMap->findOwner(key, &owner);
1340                return owner;
1341            }
1342        };
1343    };
1344
1345    template <int D>
1346    class LBTree : public WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD> {
1347        // No new variables necessary
1348    public:
1349<<<<<<< .mine
1350        typedef WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD> dcT;
1351        LBTree() {};
1352        LBTree(World& world, const SharedPtr< WorldDCPmapInterface<DClass<D>::KeyD> >& pmap) : dcT(world,pmap) {
1353            madness::print("LBTree(world, pmap) constructor");
1354            this->get_mypmap()->print();
1355            madness::print("LBTree(world, pmap) constructor (goodbye)");
1356        };
1357        template <typename T>
1358        inline void init_tree(SharedPtr< FunctionImpl<T,D> > f, typename DClass<D>::KeyDConst key) {
1359            // find Node associated with key
1360            typename FunctionImpl<T,D>::dcT::iterator it = f->coeffs.find(key);
1361            if (it == f->coeffs.end()) return;
1362            // convert Node to LBNode
1363            NodeData nd;
1364            if (!(it->second.has_children())) {
1365                typename DClass<D>::NodeD lbnode(nd,false);
1366                // insert into "this"
1367                this->insert(key, lbnode);
1368            } else {
1369                typename DClass<D>::NodeD lbnode(nd,true);
1370                // insert into "this"
1371                this->insert(key, lbnode);
1372                // then, call for each child
1373                for (KeyChildIterator<D> kit(key); kit; ++kit) {
1374                    this->init_tree<T>(f, kit.key());
1375                }
1376            }
1377        };
1378=======
1379	typedef WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD, Pmap> dcT;
1380
1381	LBTree() {};
1382	LBTree(World& world, const Pmap& pmap) : dcT(world,pmap) {
1383	    this->get_procmap().print();
1384	};
1385
1386	template <typename T>
1387	inline void init_tree(SharedPtr<FunctionImpl<T,D,Pmap> > f, typename DClass<D>::KeyDConst key) {
1388	    // find Node associated with key
1389	    typename FunctionImpl<T,D,Pmap>::iterator it = f->find(key);
1390	    if (it == f->end()) return;
1391	    // convert Node to LBNode
1392	    NodeData nd;
1393	    if (!(it->second.has_children())) {
1394		typename DClass<D>::NodeD lbnode(nd,false);
1395	        // insert into "this"
1396		this->insert(key, lbnode);
1397	    }
1398	    else {
1399		typename DClass<D>::NodeD lbnode(nd,true);
1400	        // insert into "this"
1401		this->insert(key, lbnode);
1402		// then, call for each child
1403		for (KeyChildIterator<D> kit(key); kit; ++kit) {
1404		    this->init_tree<T>(f, kit.key());
1405		}
1406	    }
1407	};
1408>>>>>>> .r223
1409
1410        // Methods:
1411        void print(typename DClass<D>::KeyDConst& key) {
1412            typename DClass<D>::treeT::iterator it = this->find(key);
1413            if (it == this->end()) return;
1414            for (Level i = 0; i < key.level(); i++) cout << "  ";
1415            madness::print(key, it->second);
1416            for (KeyChildIterator<D> kit(key); kit; ++kit) {
1417                print(kit.key());
1418            }
1419        };
1420
1421        Cost fixCost(typename DClass<D>::KeyDConst& key);
1422
1423        Cost depthFirstPartition(typename DClass<D>::KeyDConst& key,
1424                                 vector<typename DClass<D>::TreeCoords>* klist, unsigned int npieces,
1425                                 Cost totalcost = 0, Cost *maxcost = 0);
1426
1427        void rollup(typename DClass<D>::KeyDConst& key);
1428
1429        void meld(typename DClass<D>::KeyDConst& key);
1430
1431        Cost makePartition(typename DClass<D>::KeyDConst& key,
1432                           vector<typename DClass<D>::KeyD>* klist, Cost partitionSize,
1433                           bool lastPartition, Cost usedUp, bool *atleaf);
1434
1435        void removeCost(typename DClass<D>::KeyDConst& key, Cost c);
1436
1437        Cost computeCost(typename DClass<D>::KeyDConst& key);
1438
1439        // inherited methods
1440        typename WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD>::iterator
1441        end() {
1442            return WorldContainer<typename DClass<D>::KeyD, typename DClass<D>::NodeD>::end();
1443        };
1444
1445        typename WorldContainer<typename DClass<D>::KeyD,typename DClass<D>::NodeD>::iterator
1446        find(typename DClass<D>::KeyDConst& key) {
1447            return WorldContainer<typename DClass<D>::KeyD, typename DClass<D>::NodeD>::find(key);
1448        };
1449
1450//         const SharedPtr<WorldDCPmapInterface< DClass<D>::KeyD >& get_pmap() {
1451//             return WorldContainer<typename DClass<D>::KeyD, typename DClass<D>::NodeD>::get_pmap();
1452//         };
1453
1454        MyPmap<D>& get_mypmap() {
1455            return *static_cast< MyPmap<D>* >(get_pmap().get());
1456        };
1457
1458    };
1459
1460    template <typename T, int D, typename Pmap=MyPmap<D> >
1461    class LoadBalImpl {
1462    private:
1463        Function<T,D,Pmap> f;
1464        SharedPtr<typename DClass<D>::treeT> skeltree;
1465
1466<<<<<<< .mine
1467        void construct_skel(SharedPtr<FunctionImpl<T,D,Pmap> > f) {
1468            skeltree = SharedPtr<typename DClass<D>::treeT>(new typename DClass<D>::treeT(f->world,
1469                       f->coeffs.get_mypmap()));
1470            typename DClass<D>::KeyD root(0);
1471            madness::print("about to initialize tree");
1472            if (f->world.mpi.rank() == 0) {
1473                skeltree->template init_tree<T>(f,root);
1474            }
1475            madness::print("just initialized tree");
1476        };
1477=======
1478	void construct_skel(SharedPtr<FunctionImpl<T,D,Pmap> > f) {
1479	    skeltree = SharedPtr<typename DClass<D>::treeT>(new typename DClass<D>::treeT(f->world,
1480		f->get_procmap()));
1481	    typename DClass<D>::KeyD root(0);
1482	    if (f->world.mpi.rank() == 0) {
1483	    	skeltree->template init_tree<T>(f,root);
1484	    }
1485	};
1486>>>>>>> .r223
1487
1488    public:
1489<<<<<<< .mine
1490        //Constructors
1491        LoadBalImpl() {};
1492=======
1493	//Constructors
1494	LoadBalImpl() {};
1495	LoadBalImpl(Function<T,D,Pmap> f) : f(f) {
1496	    construct_skel(f.impl);
1497	};
1498	~LoadBalImpl() {};
1499>>>>>>> .r223
1500
1501        LoadBalImpl(Function<T,D> f) : f(f) {
1502            madness::print("LoadBalImpl (Function) constructor: f.impl", &f.impl);
1503            construct_skel(f.impl);
1504        };
1505
1506        ~LoadBalImpl() {};
1507
1508<<<<<<< .mine
1509        //Methods
1510        inline void loadBalance() {
1511            partition(findBestPartition());
1512        };
1513=======
1514	void partition(vector<typename DClass<D>::TreeCoords> v) {
1515	    // implement partition: copy to new FunctionImpl and replace within f
1516	    Pmap pmap(f.impl->world, v);
1517	    SharedPtr<FunctionImpl<T,D,Pmap> > newimpl(new FunctionImpl<T,D,Pmap>(*(f.impl.get()),pmap));
1518	    if (f.impl->world.mpi.rank() == 0) {
1519	    	madness::migrate<T,D,Pmap>(f.impl, newimpl);
1520		Key<D> root(0);
1521		newimpl->print(root);
1522	    }
1523	    f.impl->world.gop.fence();
1524	    f.impl = newimpl;
1525	};
1526>>>>>>> .r223
1527
1528        vector<typename DClass<D>::TreeCoords> findBestPartition();
1529
1530        void partition(vector<typename DClass<D>::TreeCoords> v) {
1531            // implement partition: copy to new FunctionImpl and replace within f
1532            madness::print("partition: at beginning");
1533            Pmap pmap(f.impl->world, v);
1534            SharedPtr<FunctionImpl<T,D,Pmap> > newimpl(new FunctionImpl<T,D>(*(f.impl.get()),pmap)); // ???????????????????????????????????
1535            if (f.impl->world.mpi.rank() == 0) {
1536                madness::migrate<T,D,Pmap>(f.impl, newimpl);
1537            }
1538            madness::print("partition: at fence");
1539            f.impl->world.gop.fence();
1540            madness::print("partition: after fence");
1541            f.impl = newimpl;
1542        };
1543
1544    };
1545
1546    CompCost computeCompCost(Cost c, int n);
1547
1548    Cost computePartitionSize(Cost cost, unsigned int parts);
1549
1550}
1551
1552#endif
1553
1554
1555    /// Simple distributed map for the tree
1556    template <int NDIM>
1557    class FunctionSimplePmap<NDIM> : public WorldDCPmapInterface< Key<NDIM> > {
1558    private:
1559        World* world;
1560        Level n;
1561
1562    public:
1563        FunctionSimplePmap() : world(0), n(2) {};
1564
1565        FunctionSimplePmap(
1566
1567
1568
1569
1570
1571
1572// Who knows why these aren't cooperating, so commented out for now
1573    /*
1574    template void migrate_data<double,1>(SharedPtr<FunctionImpl<double,1> > tfrom,
1575    	SharedPtr<FunctionImpl<double,1> > tto, DClass<1>::KeyD key);
1576    template void migrate_data<double,2>(SharedPtr<FunctionImpl<double,2> > tfrom,
1577    	SharedPtr<FunctionImpl<double,2> > tto, DClass<2>::KeyD key);
1578    template void migrate_data<double,3>(SharedPtr<FunctionImpl<double,3> > tfrom,
1579    	SharedPtr<FunctionImpl<double,3> > tto, DClass<3>::KeyD key);
1580    template void migrate_data<double,4>(SharedPtr<FunctionImpl<double,4> > tfrom,
1581    	SharedPtr<FunctionImpl<double,4> > tto, DClass<4>::KeyD key);
1582    template void migrate_data<double,5>(SharedPtr<FunctionImpl<double,5> > tfrom,
1583    	SharedPtr<FunctionImpl<double,5> > tto, DClass<5>::KeyD key);
1584    template void migrate_data<double,6>(SharedPtr<FunctionImpl<double,6> > tfrom,
1585    	SharedPtr<FunctionImpl<double,6> > tto, DClass<6>::KeyD key);
1586
1587    template void migrate_data<std::complex<double>,1>(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tfrom,
1588    	SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto, DClass<1>::KeyD key);
1589    template void migrate_data<std::complex<double>,2>(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom,
1590    	SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto, DClass<2>::KeyD key);
1591    template void migrate_data<std::complex<double>,3>(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom,
1592    	SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto, DClass<3>::KeyD key);
1593    template void migrate_data<std::complex<double>,4>(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom,
1594    	SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto, DClass<4>::KeyD key);
1595    template void migrate_data<std::complex<double>,5>(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom,
1596    	SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto, DClass<5>::KeyD key);
1597    template void migrate_data<std::complex<double>,6>(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom,
1598    	SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto, DClass<6>::KeyD key);
1599
1600    template void migrate<double,1,MyPmap<1> >(SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tfrom,
1601    	SharedPtr<FunctionImpl<double,1,MyPmap<1> > > tto);
1602    template void migrate<double,2,MyPmap<2> >(SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tfrom,
1603    	SharedPtr<FunctionImpl<double,2,MyPmap<2> > > tto);
1604    template void migrate<double,3,MyPmap<3> >(SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tfrom,
1605    	SharedPtr<FunctionImpl<double,3,MyPmap<3> > > tto);
1606    template void migrate<double,4,MyPmap<4> >(SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tfrom,
1607    	SharedPtr<FunctionImpl<double,4,MyPmap<4> > > tto);
1608    template void migrate<double,5,MyPmap<5> >(SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tfrom,
1609    	SharedPtr<FunctionImpl<double,5,MyPmap<5> > > tto);
1610    template void migrate<double,6,MyPmap<6> >(SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tfrom,
1611    	SharedPtr<FunctionImpl<double,6,MyPmap<6> > > tto);
1612
1613    template void migrate<std::complex<double>,1,MyPmap<1> >(SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > tfrom,
1614    	SharedPtr<FunctionImpl<std::complex<double>,1,MyPmap<1> > > tto);
1615    template void migrate<std::complex<double>,2,MyPmap<2> >(SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tfrom,
1616    	SharedPtr<FunctionImpl<std::complex<double>,2,MyPmap<2> > > tto);
1617    template void migrate<std::complex<double>,3,MyPmap<3> >(SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tfrom,
1618    	SharedPtr<FunctionImpl<std::complex<double>,3,MyPmap<3> > > tto);
1619    template void migrate<std::complex<double>,4,MyPmap<4> >(SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tfrom,
1620    	SharedPtr<FunctionImpl<std::complex<double>,4,MyPmap<4> > > tto);
1621    template void migrate<std::complex<double>,5,MyPmap<5> >(SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tfrom,
1622    	SharedPtr<FunctionImpl<std::complex<double>,5,MyPmap<5> > > tto);
1623    template void migrate<std::complex<double>,6,MyPmap<6> >(SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tfrom,
1624    	SharedPtr<FunctionImpl<std::complex<double>,6,MyPmap<6> > > tto);
1625    */
1626
1627
1628
1629
1630
1631
1632template <typename T, int NDIM>
1633struct TestOp : WorldObject< TestOp<T,NDIM> > {
1634    typedef T resultT;
1635    typedef TestOp<T,NDIM> opT;
1636    const int k;
1637    std::vector<long> v2k;
1638
1639    TestOp(World& world, int k) : WorldObject<opT>(world), k(k), v2k(NDIM) {
1640        this->process_pending();
1641        for (int i=0; i<NDIM; i++) v2k[i] = 2*k;
1642    };
1643
1644
1645    double norm(const Key<NDIM>& key, const Displacement<NDIM>& d) const {
1646        if (d.distsq > 2) return 0.0;
1647        else return 1.0;
1648    }
1649
1650    Tensor<T> apply(const Key<NDIM>& key, const Displacement<NDIM>& d, const Tensor<T>& c) const {
1651        print("applying ", key, d);
1652        return Tensor<resultT>(v2k);
1653    }
1654};
1655
1656
1657
1658
1659namespace madness {
1660    namespace archive {
1661        template <class Archive, class T, int NDIM>
1662        struct ArchiveLoadImpl<Archive,const TestOp<T,NDIM>*> {
1663            static inline void load(const Archive& ar, const TestOp<T,NDIM>*& ptr) {
1664                WorldObject< TestOp<T,NDIM> >* p;
1665                ar & p;
1666                ptr = static_cast< const TestOp<T,NDIM>* >(p);
1667            }
1668        };
1669
1670        template <class Archive, class T, int NDIM>
1671        struct ArchiveStoreImpl<Archive,const TestOp<T,NDIM>*> {
1672            static inline void store(const Archive& ar, const TestOp<T,NDIM>*const& ptr) {
1673                ar & static_cast< const WorldObject< TestOp<T,NDIM> >* > (ptr);
1674            }
1675        };
1676    }
1677}
1678
1679
1680
1681    /// Holds info about displacement to neighbor for application of operators
1682    template <int NDIM>
1683    struct Displacement {
1684        Vector<Translation,NDIM> d;
1685        uint64_t distsq;
1686        Displacement() {};
1687        Displacement(const Vector<int, NDIM>& d) : d(d), distsq(0) {
1688            for (int i=0; i<NDIM; i++) distsq += d[i]*d[i];
1689        }
1690
1691        bool operator<(const Displacement<NDIM>& other) const {
1692            return distsq < other.distsq;
1693        }
1694
1695        Translation operator[](int i) const {return d[i];}
1696
1697        template <typename Archive>
1698        void serialize(Archive& ar) {
1699            ar & d & distsq;
1700        }
1701    };
1702
1703    template <int NDIM>
1704    std::ostream& operator<<(std::ostream& s, const Displacement<NDIM>& disp) {
1705        s << disp.d;
1706        return s;
1707    }
1708
1709
1710
1711    /// Simplified interface around hash_map to cache stuff
1712
1713    /// Since insertions into STL containers have the nasty habit of
1714    /// invalidating iterators we actually store shared pointers
1715    template <typename Q>
1716    class SimpleCache {
1717    private:
1718        typedef HASH_MAP_NAMESPACE::hash_map< unsigned long, SharedPtr<Q> > mapT;
1719        typedef std::pair< unsigned long, SharedPtr<Q> > pairT;
1720        mapT cache;
1721
1722        // Turns (n,lx) into key
1723        inline unsigned long key(Level n, long lx) const {
1724            return (n<<25) | (lx+16777216);
1725        }
1726
1727        // Turns (n,displacement) into key
1728        template <int NDIM>
1729        inline unsigned long key(Level n, const Key<NDIM>& d) const {
1730            MADNESS_ASSERT((6+NDIM*4) <= sizeof(unsigned long)*8);
1731            unsigned long k = n<<2;
1732            for (int i=0; i<NDIM; i++) k = (k<<4) | (d.translation()[i]+7);
1733            return k;
1734        }
1735
1736    public:
1737        SimpleCache() : cache() {};
1738
1739        SimpleCache(const SimpleCache& c) : cache(c.cache) {};
1740        SimpleCache& operator=(const SimpleCache& c) {
1741            if (this != &c) {
1742                cache.clear();
1743                cache = c.cache;
1744            }
1745            return *this;
1746        }
1747
1748        /// If (n,index) is present return pointer to cached value, otherwise return NULL
1749        template <typename indexT>
1750        inline const Q* getptr(Level n,  const indexT& index) const {
1751            typename mapT::const_iterator test = cache.find(key(n,index));
1752            if (test == cache.end()) return 0;
1753            return test->second.get();
1754        }
1755
1756
1757        /// Set value associated with (n,index)
1758        template <typename indexT>
1759        inline void set(Level n, const indexT& index, const Q& val) {
1760            cache.insert(pairT(key(n,index),SharedPtr<Q>(new Q(val))));
1761        }
1762    };
1763
1764
1765
1766
1767
1768# You must define additional rules to link your test programs
1769test: $(TEST1OBJ) $(THISLIBDEPEND)
1770	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1771
1772testqm: $(TEST2OBJ) $(THISLIBDEPEND)
1773	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1774
1775jjtests: jjtests.o $(THISLIBDEPEND)
1776	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1777
1778jjtests.o:	sdc.h
1779
1780testsuite: $(TEST3OBJ) $(THISLIBDEPEND)
1781	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1782
1783tests-hqi: $(TEST4OBJ) $(THISLIBDEPEND)
1784	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1785
1786testperiodic: $(TEST5OBJ) $(THISLIBDEPEND)
1787	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1788
1789gfit: $(TEST6OBJ) $(THISLIBDEPEND)
1790	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1791
1792testbsh: $(TEST6OBJ) $(THISLIBDEPEND)
1793	$(LTLINK) $(LD) $(LDFLAGS) -o $@ $^ $(SYSLIBS) $(LTLINKBINOPTS)
1794
1795runvalg2:
1796	mpiexec -np 3 $(VALGRIND) $(VALGOPTS) ./tests-hqi -rio
1797
1798
1799
1800
1801    template <typename T, int NDIM>
1802    Void FunctionImpl<T,NDIM>::ensure_exists(const keyT& key) {
1803        PROFILE_MEMBER_FUNC(FunctionImpl);
1804        if (!coeffs.probe(key)) {
1805            keyT parent = key.parent();
1806            // If the node does not exist this implies that it will
1807            // be a leaf ... make it here so that we only send one
1808            // request up the tree to make it.
1809            coeffs.insert(key,nodeT(tensorT(),false));
1810            //madness::print("ensure_exists: sending recur up from", key, "to", parent);
1811            send(coeffs.owner(parent), &implT::recur_down_with_fill, key, parent);
1812        }
1813        return None;
1814    }
1815
1816
1817    template <typename T, int NDIM>
1818    void FunctionImpl<T,NDIM>::widen(bool fence, int ndiff) {
1819        PROFILE_MEMBER_FUNC(FunctionImpl);
1820        double tol = std::min(1e3*thresh, sqrt(thresh));
1821        for(typename dcT::iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
1822            const keyT& key = it->first;
1823            const nodeT& node = it->second;
1824            if (node.is_leaf() && node.coeff().normf()>tol) {
1825                for (int axis=0; axis<NDIM; axis++) {
1826                    for (int step=-1; step<=1; step+=2) {
1827                        keyT neigh = neighbor(key, axis, step);
1828                        if (neigh.is_valid()) {
1829                            if (ndiff > 0) neigh = neigh.parent(ndiff);
1830                            send(coeffs.owner(neigh), &implT::ensure_exists, neigh);
1831                        }
1832                    }
1833                }
1834            }
1835        }
1836        if (fence) world.gop.fence();
1837    }
1838
1839
1840        // Widens the support of the tree in preparation for integral operator
1841        void widen(bool fence, int ndiff);
1842
1843
1844        void widen(bool fence = true, int ndiff = 1) {
1845            PROFILE_MEMBER_FUNC(Function);
1846            verify();
1847            if (is_compressed()) reconstruct();
1848            impl->widen(fence, ndiff);
1849            if (fence && VERIFY_TREE) verify_tree();
1850        }
1851
1852
1853
1854
1855        template <typename testT>
1856        void conditional_refine_doit(const testT& test, const keyT& key) {
1857          nodeT& node = coeffs[key];
1858          if (node.has_coeff() && test(key, node.coeff())) {
1859            tensorT s(cdata.v2k);
1860            s(cdata.s0) = node.coeff();
1861            s = unfilter(s);
1862            node.clear_coeff();
1863            node.set_has_children(true);
1864            for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
1865               const keyT& child = kit.key();
1866               task(coeffs.owner(child), &implT:: template conditional_refine_insert_doit<testT>,
1867                    test, child, copy(s(child_patch(child))));
1868            }
1869          }
1870        }
1871
1872        template <typename testT>
1873        void conditional_refine(const testT& test, bool fence) {
1874          MADNESS_ASSERT(!compressed);
1875          for(typename dcT::iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
1876            const keyT& key = it->first;
1877            conditional_refine_doit(test, key);
1878          }
1879          if (fence) world.gop.fence();
1880        }
1881
1882
1883
1884        template <typename testT>
1885        void conditional_refine(const testT& test, bool fence=true) {
1886          if (is_compressed()) reconstruct();
1887          impl->conditional_refine(test, fence);
1888        }
1889
1890
1891
1892
1893        template <typename opT, typename R>
1894        Void do_apply_acc(const opT* op, const FunctionImpl<R,NDIM>* f, const keyT& key, const Tensor<T>& t) {
1895            PROFILE_MEMBER_FUNC(FunctionImpl);
1896            if (!coeffs.probe(key)) coeffs.replace(key, nodeT());
1897            nodeT& node = coeffs[key];
1898
1899            // Accumulate into the box
1900            if (node.has_coeff()) {
1901                node.coeff().gaxpy(1.0,t,1.0);
1902            }
1903            else {
1904                node.set_coeff(copy(t));
1905                // No existing coeff and no children means the node is newly created for
1906                // this operation and we must tell its parent that it exists.
1907                if ((!node.has_children()) && (key.level() > 0)) {
1908                    Key<NDIM> parent = key.parent();
1909                    coeffs.send(parent, &FunctionNode<T,NDIM>::set_has_children_recursive, coeffs, parent);
1910                }
1911
1912                if (op->dowiden1) {
1913                    typename FunctionImpl<R,NDIM>::dcT::const_iterator it = f->coeffs.find(key);
1914                    if ((it==f->coeffs.end() || it->second.is_invalid()) &&
1915                        (t.normf() > truncate_tol(thresh,key))) {
1916                        // We just made the first contribution to box that did not
1917                        // exist in the source.  Make the source box with any
1918                        // missing parents and apply the operator to each of them.
1919                        recur_down_with_apply(op, f, key.parent(), key, Tensor<R>());
1920                    }
1921                }
1922
1923            }
1924            return None;
1925        }
1926
1927
1928
1929        // This routine MUST be executed in an AM handler for atomicity
1930        template <typename opT, typename R>
1931        Void recur_down_with_apply(const opT* op,
1932                                   const FunctionImpl<R,NDIM>* cf,
1933                                   const keyT& key,
1934                                   const keyT& target,
1935                                   const Tensor<R>& r) {
1936
1937            PROFILE_MEMBER_FUNC(FunctionImpl);
1938            // We send the coeffs down in this routine so we have effectively
1939            // atomic insert+apply to eliminate a race condition leading to
1940            // double application of the operator.
1941
1942            FunctionImpl<R,NDIM>* f = const_cast<FunctionImpl<R,NDIM>*>(cf);
1943
1944            // <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<  race condition??????
1945
1946            if (!f->coeffs.probe(key)) {
1947                //madness::print("not there", key);
1948                f->coeffs.replace(key,FunctionNode<R,NDIM>());
1949            }
1950            FunctionNode<R,NDIM>& node = f->coeffs[key];
1951
1952            if (r.size) {
1953                // If r has data then it will be the coefficients for this node.
1954                // If we don't already have coeffs courtesy of someone else then
1955                // insert them and apply the operator.
1956                if (!node.has_coeff()) {
1957                    MADNESS_ASSERT(r.iscontiguous());
1958                    node.set_coeff(r);
1959                    //madness::print("EXTENDED APPLY", key, node.coeff().normf());
1960                    task(world.rank(), &implT:: template do_apply<opT,R>, op, cf, key, node.coeff());
1961                    if (key.level() == target.level()) return None; // Mission accomplished!
1962                    if (!target.is_child_of(key)) return None; // This is a sibling of the correct path
1963                }
1964            }
1965
1966            // If r does not have data or we are not yet at our target then we
1967            // must look at the node to see what to do
1968
1969            // - If key==target
1970            //   The coeffs should already have been made (and the operator applied)
1971            //   while someone else was making another node.
1972            //
1973            // - Otherwise
1974            // a) Node does not exist ... forward up.  Accessing the node in the manner
1975            //    above would have made an invalid node ... so this is captured by b)
1976            // b) Node exists but is invalid ... forward up (this means that someone else
1977            //    is already trying to make this node ... better would be to attach
1978            //    a callback so that when the coeffs are set this task is initiated).
1979            // c) Node exists and has children ... forward down
1980            // d) Node exists and has no children  ... recur down
1981
1982            Tensor<R> empty;
1983
1984            if (node.has_coeff()) { // d ... recur down if appropriate
1985                if (key.level() < target.level() && target.is_child_of(key)) {
1986                    const Tensor<R>& r = node.coeff();
1987                    Tensor<R> s;
1988                    if (r.dim[0] == k) {
1989                        Tensor<R> d(cdata.v2k);
1990                        d(cdata.s0) = node.coeff()(cdata.s0);
1991                        s = unfilter(d);
1992                    }
1993                    else if (r.dim[0] == 2*k) {
1994                        s = unfilter(node.coeff());
1995                    }
1996                    else {
1997                        MADNESS_EXCEPTION("Uh?",r.dim[0]);
1998                    }
1999
2000                    node.set_has_children(true);
2001                    for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2002                        const keyT& child = kit.key();
2003                        Tensor<R> ss = copy(s(child_patch(child)));
2004                        //madness::print("EXTENDED DOWN-2", key, "to", child, ss.normf());
2005                        task(coeffs.owner(child), &implT:: template recur_down_with_apply<opT,R>,
2006                             op, cf, child, target, ss);
2007                    }
2008                }
2009            }
2010            else { // a and b ... forward up
2011                keyT parent = key.parent();
2012                //madness::print("EXTENDED UP", key, "to", parent);
2013                task(coeffs.owner(parent), &implT:: template recur_down_with_apply<opT,R>, op, cf, parent, target, empty);
2014            }
2015
2016            return None;
2017        }
2018
2019
2020
2021
2022
2023
2024
2025        /// Apply one of the separated terms, accumulating into the result
2026
2027        /// !!! Keep this routine exactly consistent with muopxvT so that
2028        /// munorm converges correctly
2029        template <typename T>
2030        void muopxv(Level n,
2031                    const ConvolutionData1D<Q>* const ops[NDIM],
2032                    const Tensor<T>& f, const Tensor<T>& f0,
2033                    Tensor<TENSOR_RESULT_TYPE(T,Q)>& result,
2034                    const double tol,
2035                    const double musign) const {
2036            PROFILE_MEMBER_FUNC(SeparatedConvolution);
2037
2038            double Rnorm = 1.0;
2039            for (int d=0; d<NDIM; d++) Rnorm *= ops[d]->Rnorm;
2040            if (Rnorm == 0.0) return;
2041
2042            // Temporaries can be optimized away
2043            Tensor<TENSOR_RESULT_TYPE(T,Q)> tmp = inner(f,ops[0]->R,0,0);
2044            for (int d=1; d<NDIM; d++) {
2045                tmp = inner(tmp,ops[d]->R,0,0);
2046            }
2047            result.gaxpy(1.0,tmp,musign);
2048
2049            if (n > 0) {
2050                tmp = inner(f0,ops[0]->T,0,0);
2051                for (int d=1; d<NDIM; d++) {
2052                    tmp = inner(tmp,ops[d]->T,0,0);
2053                }
2054                result(s0).gaxpy(1.0,tmp,-musign);
2055            }
2056        }
2057
2058        /// Apply transpose of one of the separated terms, accumulating into the result
2059
2060        /// This is only used when computing the actual 2-norm by the power method
2061        /// !!! Keep this routine exactly consistent with muopxv so that
2062        /// munorm converges correctly
2063        template <typename T>
2064        void muopxvT(Level n,
2065                     const ConvolutionData1D<Q>* ops[],
2066                     const Tensor<T>& f, const Tensor<T>& f0,
2067                     Tensor<TENSOR_RESULT_TYPE(T,Q)>& result,
2068                     const double tol,
2069                     const double musign) const {
2070            PROFILE_MEMBER_FUNC(SeparatedConvolution);
2071
2072            double Rnorm = 1.0;
2073            for (int d=0; d<NDIM; d++) Rnorm *= ops[d]->Rnorm;
2074            if (Rnorm == 0.0) return;
2075
2076            // Temporaries can be optimized away
2077            Tensor<TENSOR_RESULT_TYPE(T,Q)> tmp = inner(f,ops[0]->R,0,1);
2078            for (int d=1; d<NDIM; d++) {
2079                tmp = inner(tmp,ops[d]->R,0,1);
2080            }
2081            result.gaxpy(1.0,tmp,musign);
2082
2083            if (n > 0) {
2084                tmp = inner(f0,ops[0]->T,0,1); // Slice+copy can be optimized away
2085                for (int d=1; d<NDIM; d++) {
2086                    tmp = inner(tmp,ops[d]->T,0,1);
2087                }
2088                result(s0).gaxpy(1.0,tmp,-musign);
2089            }
2090        }
2091
2092
2093        /// Computes the 2-norm of one of the separated terms
2094        double munorm(Level n, const ConvolutionData1D<Q>* ops[]) const {
2095            PROFILE_MEMBER_FUNC(SeparatedConvolution);
2096            Tensor<Q> f(v2k), f0, ff(v2k);
2097
2098            double tol = 1e-20;
2099
2100            f.fillrandom();
2101            f.scale(1.0/f.normf());
2102            double evalp = 1.0, eval, ratio=99.0;
2103            for (int iter=0; iter<100; iter++) {
2104                ff.fill(0.0);
2105                f0 = copy(f(s0));
2106                muopxv(n,ops,f,f0,ff,tol,1.0);
2107                f.fill(0.0);
2108                f0 = copy(ff(s0));
2109                muopxvT(n,ops,ff,f0,f,tol,1.0);
2110
2111                eval = f.normf();
2112                if (eval == 0.0) break;
2113                f.scale(1.0/eval);
2114                eval = sqrt(eval);
2115                ratio = eval/evalp;
2116                //std::printf("munorm: %d %10.2e %10.2e %10.2e \n", iter, eval, evalp, ratio);
2117                if (iter>0 && ratio<1.2 && ratio>0.9999) break; // 1.2 was 1.02;  >0.9999 was >=1.0
2118                if (iter>10 && eval<tol) break;
2119                evalp = eval;
2120                if (iter == 99) throw "munorm failed";
2121            }
2122            return eval*ratio;
2123        }
2124
2125
2126
2127
2128
2129
2130        /// Invoked by result to compute the pointwise product result=left*right
2131
2132        /// This version requires all three functions have the same distribution.
2133        /// Should be straightforward to do an efficient version that does not
2134        /// require this but I have not thought about that yet.
2135        ///
2136        /// Possible non-blocking communication and optional fence.
2137        template <typename L, typename R>
2138        void mul(const FunctionImpl<L,NDIM>& left, const FunctionImpl<R,NDIM>& right, bool fence) {
2139            PROFILE_MEMBER_FUNC(FunctionImpl);
2140            typedef std::pair< keyT,Tensor<R> > rpairT;
2141            typedef std::pair< keyT,Tensor<L> > lpairT;
2142            MADNESS_ASSERT(coeffs.get_pmap() == left.coeffs.get_pmap() && \
2143                           coeffs.get_pmap() == right.coeffs.get_pmap());
2144            // The three possibilities for the relative position of
2145            // the left and right coefficients in the tree are:
2146            //
2147            // 1.  left==right
2148            // 2.  left>right
2149            // 3.  left<right
2150            //
2151            // First loop thru local coeff in left.  Handle right at the same level or above.
2152	    for (typename FunctionImpl<L,NDIM>::dcT::const_iterator it=left.coeffs.begin();
2153                it != left.coeffs.end();
2154                ++it) {
2155                const keyT& key = it->first;
2156                const FunctionNode<L,NDIM>& left_node = it->second;
2157
2158                if (left_node.has_coeff()) {
2159                    if (right.coeffs.probe(key)) {
2160                        const FunctionNode<R,NDIM>& right_node = right.coeffs.find(key).get()->second;
2161                        if (right_node.has_coeff()) {
2162                            task(world.rank(), &implT:: template do_mul<L,R>, key, left_node.coeff(),
2163                                 rpairT(key,right_node.coeff()));  // Case 1.
2164                        }
2165                    }
2166                    else { // If right node does not exist then it must be further up the tree
2167                        const keyT parent = key.parent();
2168                        Future<rpairT> arg;
2169                        right.task(coeffs.owner(parent), &FunctionImpl<R,NDIM>::sock_it_to_me,
2170                                   parent, arg.remote_ref(world), TaskAttributes::hipri());
2171                        task(world.rank(), &implT:: template do_mul<L,R>, key, left_node.coeff(), arg); // Case 2.
2172                    }
2173                }
2174                else if (!coeffs.probe(key)) {
2175                    // Interior node
2176                    coeffs.replace(key,nodeT(tensorT(),true));
2177                }
2178
2179            }
2180
2181            // Now loop thru local coeff in right and do case 3.
2182	    for (typename FunctionImpl<R,NDIM>::dcT::const_iterator it=right.coeffs.begin();
2183                it != right.coeffs.end();
2184                ++it) {
2185                const keyT& key = it->first;
2186                const FunctionNode<R,NDIM>& right_node = it->second;
2187                if (right_node.has_coeff()) {
2188                    if (!left.coeffs.probe(key)) {
2189                        Future<lpairT> arg;
2190                        const keyT& parent = key.parent();
2191                        left.task(coeffs.owner(parent), &FunctionImpl<L,NDIM>::sock_it_to_me,
2192                                  parent, arg.remote_ref(world), TaskAttributes::hipri());
2193                        task(world.rank(), &implT:: template do_mul<R,L>, key, right_node.coeff(), arg); // Case 3.
2194                    }
2195                }
2196                else if (!coeffs.probe(key)) {
2197                    // Interior node
2198                    coeffs.replace(key,nodeT(tensorT(),true));
2199                }
2200
2201            }
2202            if (fence) world.gop.fence();
2203        }
2204
2205
2206
2207        template <typename L, typename R>
2208        Void do_mul_sparse2(const keyT& key,
2209                            const std::pair< keyT,Tensor<L> >& larg,
2210                            const std::pair< keyT,Tensor<R> >& rarg,
2211                            const FunctionImpl<R,NDIM>* right) {
2212            PROFILE_MEMBER_FUNC(FunctionImpl);
2213
2214            if (rarg.second.size > 0) {
2215                if (larg.first == key) {
2216                    //madness::print("L*R",key,larg.first,larg.second.size,rarg.first,rarg.second.size);
2217                    do_mul(key, larg.second, rarg);
2218                }
2219                else {
2220                    //madness::print("R*L",key,larg.first,larg.second.size,rarg.first,rarg.second.size);
2221                    do_mul(key, rarg.second, larg);
2222                }
2223            }
2224            else {
2225                coeffs.replace(key, nodeT(tensorT(), true));  // Insert interior node
2226                for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2227                    typedef std::pair< keyT,Tensor<R> > rpairT;
2228                    Future<rpairT> rarg;
2229                    right->task(coeffs.owner(kit.key()), &FunctionImpl<R,NDIM>::sock_it_to_me,
2230                                kit.key(), rarg.remote_ref(world), TaskAttributes::hipri());
2231
2232
2233                    task(world.rank(), &implT:: template do_mul_sparse2<L,R>,
2234                         kit.key(),larg, rarg, right);
2235                }
2236            }
2237            return None;
2238        }
2239
2240        template <typename L, typename R>
2241        Void do_mul_sparse(const Tensor<L>& left_coeff, const FunctionImpl<R,NDIM>* right, double tol,
2242                           const keyT& key, double right_norm) {
2243            PROFILE_MEMBER_FUNC(FunctionImpl);
2244            if (left_coeff.normf()*right_norm > truncate_tol(tol,key)) {
2245                typedef std::pair< keyT,Tensor<R> > rpairT;
2246                typedef std::pair< keyT,Tensor<L> > lpairT;
2247                Future<rpairT> rarg;
2248                right->task(coeffs.owner(key), &FunctionImpl<R,NDIM>::sock_it_to_me,
2249                            key, rarg.remote_ref(world), TaskAttributes::hipri());
2250                task(world.rank(), &implT:: template do_mul_sparse2<L,R>,
2251                     key ,lpairT(key,left_coeff), rarg, right);
2252            }
2253            else {
2254                coeffs.replace(key, nodeT(tensorT(cdata.vk), false));  // Result is zero
2255            }
2256            return None;
2257        }
2258
2259        template <typename L, typename R>
2260        void mul_sparse(const FunctionImpl<L,NDIM>& left, const FunctionImpl<R,NDIM>& right, double tol, bool fence) {
2261            // Loop thru leaf nodes in left
2262	    for (typename FunctionImpl<L,NDIM>::dcT::const_iterator it=left.coeffs.begin();
2263                it != left.coeffs.end();
2264                ++it) {
2265                const keyT& key = it->first;
2266                const FunctionNode<L,NDIM>& left_node = it->second;
2267
2268                if (left_node.is_leaf()) {
2269                    Future<double> rarg = right.task(right.coeffs.owner(key), &implT::get_norm_tree_recursive, key, TaskAttributes::hipri());
2270                    task(world.rank(), &implT:: template do_mul_sparse<L,R>, left_node.coeff(), &right, tol, key, rarg);
2271                }
2272                else {
2273                    coeffs.replace(key, nodeT(tensorT(), true));  // Insert interior node
2274                }
2275            }
2276            if (fence) world.gop.fence();
2277        }
2278
2279
2280
2281
2282
2283                    // reorder subspace in order of decreasing residual norm
2284                    for (int i=0; i<=m; i++) {
2285                        for (int j=0; j<i; j++) {
2286                            if (rnorms[i] > rnorms[j]) {
2287                                swap(rvec[i],rvec[j]);
2288                                swap(fvec[i],fvec[j]);
2289                                swap(rnorms[i],rnorms[j]);
2290
2291                                tensorT tmp;
2292                                tmp = copy(Q(i,_)); Q(i,_) = Q(j,_); Q(j,_) = tmp;
2293                                tmp = copy(Q(_,i)); Q(_,i) = Q(_,j); Q(_,j) = tmp;
2294                            }
2295                        }
2296                    }
2297                    print("reordered rnorms", rnorms);
2298
2299
2300
2301
2302
2303
2304
2305//                             tensorT result(cdata.vk);
2306//                             if (axis == 2) {
2307//                                 for (int p=0; p<k; p++) {
2308//                                     for (int q=0; q<k; q++) {
2309//                                         for (int r=0; r<k; r++) {
2310//                                             for (int s=0; s<k; s++) {
2311//                                                 result(p,q,r) += R(r,s)*c(p,q,s);
2312//                                             }
2313//                                         }
2314//                                     }
2315//                                 }
2316//                             }
2317//                             else if (axis == 1) {
2318//                                 for (int p=0; p<k; p++) {
2319//                                     for (int q=0; q<k; q++) {
2320//                                         for (int r=0; r<k; r++) {
2321//                                             for (int s=0; s<k; s++) {
2322//                                                 result(p,r,q) += R(r,s)*c(p,s,q);
2323//                                             }
2324//                                         }
2325//                                     }
2326//                                 }
2327//                             }
2328//                             else if (axis == 0) {
2329//                                 for (int p=0; p<k; p++) {
2330//                                     for (int q=0; q<k; q++) {
2331//                                         for (int r=0; r<k; r++) {
2332//                                             for (int s=0; s<k; s++) {
2333//                                                 result(r,p,q) += R(r,s)*c(s,p,q);
2334//                                             }
2335//                                         }
2336//                                     }
2337//                                 }
2338//                             }
2339
2340//                             if ((result - Result).normf() > 1e-10) {
2341//                                 print("BAD", key, axis, (result - Result).normf());
2342//                             }
2343
2344
2345
2346
2347
2348    /// 1D Gaussian convolution summed over periodic translations
2349
2350    /// r_periodic(n,l) = sum(R=-maxR,+maxR)[r_nonperiodic(n,l+R*2^n)]
2351    template <typename Q>
2352    class PeriodicGaussianConvolution1D : public Convolution1D<Q> {
2353    public:
2354
2355        const int k;
2356        const int maxR;
2357        GaussianConvolution1D<Q> g;
2358
2359        PeriodicGaussianConvolution1D(int k, int maxR, Q coeff, double expnt, double sign=1.0)
2360                : Convolution1D<Q>(k,k,1.0), k(k), maxR(maxR), g(k,coeff,expnt,sign) {}
2361
2362        virtual ~PeriodicGaussianConvolution1D() {}
2363
2364        Level natural_level() const {
2365            return g.natural_level();
2366        }
2367
2368        Tensor<Q> rnlp(Level n, Translation lx) const {
2369            Translation twon = Translation(1)<<n;
2370            Tensor<Q> r(2*k);
2371            for (int R=-maxR; R<=maxR; R++) {
2372                r.gaxpy(1.0, g.get_rnlp(n,R*twon+lx), 1.0);
2373            }
2374            return r;
2375        }
2376
2377        bool issmall(Level n, Translation lx) const {
2378            Translation twon = Translation(1)<<n;
2379            for (int R=-maxR; R<=maxR; R++) {
2380                if (!g.issmall(n, R*twon+lx)) return false;
2381            }
2382            return true;
2383        }
2384    };
2385/*
2386  This file is part of MADNESS.
2387
2388  Copyright (C) <2007> <Oak Ridge National Laboratory>
2389
2390  This program is free software; you can redistribute it and/or modify
2391  it under the terms of the GNU General Public License as published by
2392  the Free Software Foundation; either version 2 of the License, or
2393  (at your option) any later version.
2394
2395  This program is distributed in the hope that it will be useful,
2396  but WITHOUT ANY WARRANTY; without even the implied warranty of
2397  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2398  GNU General Public License for more details.
2399
2400  You should have received a copy of the GNU General Public License
2401  along with this program; if not, write to the Free Software
2402  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
2403
2404  For more information please contact:
2405
2406  Robert J. Harrison
2407  Oak Ridge National Laboratory
2408  One Bethel Valley Road
2409  P.O. Box 2008, MS-6367
2410
2411  email: harrisonrj@ornl.gov
2412  tel:   865-241-3937
2413  fax:   865-572-0680
2414
2415
2416  $Id$
2417*/
2418
2419
2420//#define WORLD_INSTANTIATE_STATIC_TEMPLATES
2421#include <madness/mra/mra.h>
2422#include <madness/world/worldhashmap.h>
2423
2424
2425extern "C" double round(double x);
2426
2427
2428/// \file mra.cc
2429/// \file Declaration and initialization of static data, some implementation, some instantiation
2430
2431namespace madness {
2432
2433    // Definition and initialization of FunctionDefaults static members
2434    // It cannot be an instance of FunctionFactory since we want to
2435    // set the defaults independent of the data type.
2436
2437    template <typename T, int NDIM>
2438    void FunctionCommonData<T,NDIM>::_make_dc_periodic() {
2439        // See ABGV for details
2440        r0 = Tensor<double>(k,k);
2441        rp = Tensor<double>(k,k);
2442        rm = Tensor<double>(k,k);
2443
2444        double iphase = 1.0;
2445        for (int i=0; i<k; i++) {
2446            double jphase = 1.0;
2447            for (int j=0; j<k; j++) {
2448                double gammaij = sqrt(double((2*i+1)*(2*j+1)));
2449                double Kij;
2450                if (((i-j)>0) && (((i-j)%2)==1))
2451                    Kij = 2.0;
2452                else
2453                    Kij = 0.0;
2454
2455                r0(i,j) = 0.5*(1.0 - iphase*jphase - 2.0*Kij)*gammaij;
2456                rm(i,j) = 0.5*jphase*gammaij;
2457                rp(i,j) =-0.5*iphase*gammaij;
2458                jphase = -jphase;
2459            }
2460            iphase = -iphase;
2461        }
2462
2463        // Make the rank-1 forms of rm and rp
2464        rm_left = Tensor<double>(k);
2465        rm_right = Tensor<double>(k);
2466        rp_left = Tensor<double>(k);
2467        rp_right = Tensor<double>(k);
2468
2469        iphase = 1.0;
2470        for (int i=0; i<k; i++) {
2471            double gamma = sqrt(0.5*(2*i+1));
2472            rm_left(i)  = rp_right(i) = gamma;
2473            rm_right(i) = rp_left(i)  = gamma*iphase;
2474            iphase *= -1.0;
2475        }
2476        rp_left.scale(-1.0);
2477
2478//         Tensor<double> rm_test = outer(rm_left,rm_right);
2479//         Tensor<double> rp_test = outer(rp_left,rp_right);
2480    }
2481
2482    template <typename T, int NDIM>
2483    void FunctionCommonData<T,NDIM>::_init_twoscale() {
2484        if (! two_scale_hg(k, &hg)) throw "failed to get twoscale coefficients";
2485        hgT = transpose(hg);
2486        hgsonly = copy(hg(Slice(0,k-1),_));
2487    }
2488
2489    template <typename T, int NDIM>
2490    void FunctionCommonData<T,NDIM>::_init_quadrature
2491    (int k, int npt, Tensor<double>& quad_x, Tensor<double>& quad_w,
2492     Tensor<double>& quad_phi, Tensor<double>& quad_phiw, Tensor<double>& quad_phit) {
2493        quad_x = Tensor<double>(npt);
2494        quad_w = Tensor<double>(npt);
2495        quad_phi = Tensor<double>(npt,k);
2496        quad_phiw = Tensor<double>(npt,k);
2497
2498        gauss_legendre(npt,0.0,1.0,quad_x.ptr(),quad_w.ptr());
2499        for (int mu=0; mu<npt; mu++) {
2500            double phi[200];
2501            legendre_scaling_functions(quad_x(mu),k,phi);
2502            for (int j=0; j<k; j++) {
2503                quad_phi(mu,j) = phi[j];
2504                quad_phiw(mu,j) = quad_w(mu)*phi[j];
2505            }
2506        }
2507        quad_phit = transpose(quad_phi);
2508    }
2509
2510
2511    template <typename T, int NDIM>
2512    void FunctionImpl<T,NDIM>::verify_tree() const {
2513        PROFILE_MEMBER_FUNC(FunctionImpl);
2514        world.gop.fence();  // Make sure nothing is going on
2515
2516        // Verify consistency of compression status, existence and size of coefficients,
2517        // and has_children() flag.
2518        for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
2519            const keyT& key = it->first;
2520            const nodeT& node = it->second;
2521            bool bad;
2522
2523            if (is_compressed()) {
2524                if (node.has_children()) {
2525                    bad = node.coeff().dim[0] != 2*cdata.k;
2526                }
2527                else {
2528                    bad = node.coeff().size != 0;
2529                }
2530            }
2531            else {
2532                if (node.has_children()) {
2533                    bad = node.coeff().size != 0;
2534                }
2535                else {
2536                    bad = node.coeff().dim[0] != cdata.k;
2537                }
2538            }
2539
2540            if (bad) {
2541                print(world.rank(), "FunctionImpl: verify: INCONSISTENT TREE NODE, key =", key, ", node =", node,
2542                      ", dim[0] =",node.coeff().dim[0],", compressed =",is_compressed());
2543                std::cout.flush();
2544                MADNESS_EXCEPTION("FunctionImpl: verify: INCONSISTENT TREE NODE", 0);
2545            }
2546        }
2547
2548        // Ensure that parents and children exist appropriately
2549        for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
2550            const keyT& key = it->first;
2551            const nodeT& node = it->second;
2552
2553            if (key.level() > 0) {
2554                const keyT parent = key.parent();
2555                typename dcT::const_iterator pit = coeffs.find(parent).get();
2556                if (pit == coeffs.end()) {
2557                    print(world.rank(), "FunctionImpl: verify: MISSING PARENT",key,parent);
2558                    std::cout.flush();
2559                    MADNESS_EXCEPTION("FunctionImpl: verify: MISSING PARENT", 0);
2560                }
2561                const nodeT& pnode = pit->second;
2562                if (!pnode.has_children()) {
2563                    print(world.rank(), "FunctionImpl: verify: PARENT THINKS IT HAS NO CHILDREN",key,parent);
2564                    std::cout.flush();
2565                    MADNESS_EXCEPTION("FunctionImpl: verify: PARENT THINKS IT HAS NO CHILDREN", 0);
2566                }
2567            }
2568
2569            for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2570                typename dcT::const_iterator cit = coeffs.find(kit.key()).get();
2571                if (cit == coeffs.end()) {
2572                    if (node.has_children()) {
2573                        print(world.rank(), "FunctionImpl: verify: MISSING CHILD",key,kit.key());
2574                        std::cout.flush();
2575                        MADNESS_EXCEPTION("FunctionImpl: verify: MISSING CHILD", 0);
2576                    }
2577                }
2578                else {
2579                    if (! node.has_children()) {
2580                        print(world.rank(), "FunctionImpl: verify: UNEXPECTED CHILD",key,kit.key());
2581                        std::cout.flush();
2582                        MADNESS_EXCEPTION("FunctionImpl: verify: UNEXPECTED CHILD", 0);
2583                    }
2584                }
2585            }
2586        }
2587
2588        world.gop.fence();
2589    }
2590
2591    template <typename T, int NDIM>
2592    T FunctionImpl<T,NDIM>::eval_cube(Level n, coordT x, const tensorT c) const {
2593        PROFILE_MEMBER_FUNC(FunctionImpl);
2594        const int k = cdata.k;
2595        double px[NDIM][k];
2596        T sum = T(0.0);
2597
2598        for (int i=0; i<NDIM; i++) legendre_scaling_functions(x[i],k,px[i]);
2599
2600        if (NDIM == 1) {
2601            for (int p=0; p<k; p++)
2602                sum += c(p)*px[0][p];
2603        }
2604        else if (NDIM == 2) {
2605            for (int p=0; p<k; p++)
2606                for (int q=0; q<k; q++)
2607                    sum += c(p,q)*px[0][p]*px[1][q];
2608        }
2609        else if (NDIM == 3) {
2610            for (int p=0; p<k; p++)
2611                for (int q=0; q<k; q++)
2612                    for (int r=0; r<k; r++)
2613                        sum += c(p,q,r)*px[0][p]*px[1][q]*px[2][r];
2614        }
2615        else if (NDIM == 4) {
2616            for (int p=0; p<k; p++)
2617                for (int q=0; q<k; q++)
2618                    for (int r=0; r<k; r++)
2619                        for (int s=0; s<k; s++)
2620                            sum += c(p,q,r,s)*px[0][p]*px[1][q]*px[2][r]*px[3][s];
2621        }
2622        else if (NDIM == 5) {
2623            for (int p=0; p<k; p++)
2624                for (int q=0; q<k; q++)
2625                    for (int r=0; r<k; r++)
2626                        for (int s=0; s<k; s++)
2627                            for (int t=0; t<k; t++)
2628                                sum += c(p,q,r,s,t)*px[0][p]*px[1][q]*px[2][r]*px[3][s]*px[4][t];
2629        }
2630        else if (NDIM == 6) {
2631            for (int p=0; p<k; p++)
2632                for (int q=0; q<k; q++)
2633                    for (int r=0; r<k; r++)
2634                        for (int s=0; s<k; s++)
2635                            for (int t=0; t<k; t++)
2636                                for (int u=0; u<k; u++)
2637                                    sum += c(p,q,r,s,t,u)*px[0][p]*px[1][q]*px[2][r]*px[3][s]*px[4][t]*px[5][u];
2638        }
2639        else {
2640            MADNESS_EXCEPTION("FunctionImpl:eval_cube:NDIM?",NDIM);
2641        }
2642        return sum*pow(2.0,0.5*NDIM*n)/sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2643    }
2644
2645    template <typename T, int NDIM>
2646    Void FunctionImpl<T,NDIM>::reconstruct_op(const keyT& key, const tensorT& s) {
2647        PROFILE_MEMBER_FUNC(FunctionImpl);
2648        // Note that after application of an integral operator not all
2649        // siblings may be present so it is necessary to check existence
2650        // and if absent insert an empty leaf node.
2651        //
2652        // If summing the result of an integral operator (i.e., from
2653        // non-standard form) there will be significant scaling function
2654        // coefficients at all levels and possibly difference coefficients
2655        // in leaves, hence the tree may refine as a result.
2656        typename dcT::iterator it = coeffs.find(key).get();
2657        if (it == coeffs.end()) {
2658            coeffs.replace(key,nodeT(tensorT(),false));
2659            it = coeffs.find(key).get();
2660        }
2661        nodeT& node = it->second;
2662
2663        // The integral operator will correctly connect interior nodes
2664        // to children but may leave interior nodes without coefficients
2665        // ... but they still need to sum down so just give them zeros
2666        if (node.has_children() && !node.has_coeff()) {
2667            node.set_coeff(tensorT(cdata.v2k));
2668        }
2669
2670        if (node.has_children() || node.has_coeff()) { // Must allow for inconsistent state from transform, etc.
2671            tensorT d = node.coeff();
2672            if (d.size == 0) d = tensorT(cdata.v2k);
2673            if (key.level() > 0) d(cdata.s0) += s; // -- note accumulate for NS summation
2674            d = unfilter(d);
2675            node.clear_coeff();
2676            node.set_has_children(true);
2677            for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2678                const keyT& child = kit.key();
2679                tensorT ss = copy(d(child_patch(child)));
2680                PROFILE_BLOCK(recon_send);
2681                task(coeffs.owner(child), &implT::reconstruct_op, child, ss);
2682            }
2683        }
2684        else {
2685            if (key.level()) node.set_coeff(copy(s));
2686            else node.set_coeff(s);
2687        }
2688        return None;
2689    }
2690
2691
2692    template <typename T, int NDIM, typename FF>
2693    //~ void FunctionImpl<T,NDIM>::fcube(const keyT& key, const FunctionFunctorInterface<T,NDIM>& f, const Tensor<double>& qx, tensorT& fval) const {
2694    void FunctionImpl<T,NDIM>::fcube(const keyT& key, const FF& f, const Tensor<double>& qx, tensorT& fval) const {
2695        PROFILE_MEMBER_FUNC(FunctionImpl);
2696        const Vector<Translation,NDIM>& l = key.translation();
2697        const Level n = key.level();
2698        const double h = std::pow(0.5,double(n));
2699        coordT c; // will hold the point in user coordinates
2700        const int npt = qx.dim[0];
2701
2702        const Tensor<double>& cell_width = FunctionDefaults<NDIM>::get_cell_width();
2703        const Tensor<double>& cell = FunctionDefaults<NDIM>::get_cell();
2704
2705        if (NDIM == 1) {
2706            for (int i=0; i<npt; i++) {
2707                c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2708                fval(i) = f(c);
2709            }
2710        }
2711        else if (NDIM == 2) {
2712            for (int i=0; i<npt; i++) {
2713                c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2714                for (int j=0; j<npt; j++) {
2715                    c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2716                    fval(i,j) = f(c);
2717                }
2718            }
2719        }
2720        else if (NDIM == 3) {
2721            for (int i=0; i<npt; i++) {
2722                c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2723                for (int j=0; j<npt; j++) {
2724                    c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2725                    for (int k=0; k<npt; k++) {
2726                        c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2727                        fval(i,j,k) = f(c);
2728                    }
2729                }
2730            }
2731        }
2732        else if (NDIM == 4) {
2733            for (int i=0; i<npt; i++) {
2734                c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2735                for (int j=0; j<npt; j++) {
2736                    c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2737                    for (int k=0; k<npt; k++) {
2738                        c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2739                        for (int m=0; m<npt; m++) {
2740                            c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2741                            fval(i,j,k,m) = f(c);
2742                        }
2743                    }
2744                }
2745            }
2746        }
2747        else if (NDIM == 5) {
2748            for (int i=0; i<npt; i++) {
2749                c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2750                for (int j=0; j<npt; j++) {
2751                    c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2752                    for (int k=0; k<npt; k++) {
2753                        c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2754                        for (int m=0; m<npt; m++) {
2755                            c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2756                            for (int n=0; n<npt; n++) {
2757                                c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy
2758                                fval(i,j,k,m,n) = f(c);
2759                            }
2760                        }
2761                    }
2762                }
2763            }
2764        }
2765        else if (NDIM == 6) {
2766            for (int i=0; i<npt; i++) {
2767                c[0] = cell(0,0) + h*cell_width[0]*(l[0] + qx(i)); // x
2768                for (int j=0; j<npt; j++) {
2769                    c[1] = cell(1,0) + h*cell_width[1]*(l[1] + qx(j)); // y
2770                    for (int k=0; k<npt; k++) {
2771                        c[2] = cell(2,0) + h*cell_width[2]*(l[2] + qx(k)); // z
2772                        for (int m=0; m<npt; m++) {
2773                            c[3] = cell(3,0) + h*cell_width[3]*(l[3] + qx(m)); // xx
2774                            for (int n=0; n<npt; n++) {
2775                                c[4] = cell(4,0) + h*cell_width[4]*(l[4] + qx(n)); // yy
2776                                for (int p=0; p<npt; p++) {
2777                                    c[5] = cell(5,0) + h*cell_width[5]*(l[5] + qx(p)); // zz
2778                                    fval(i,j,k,m,n,p) = f(c);
2779                                }
2780                            }
2781                        }
2782                    }
2783                }
2784            }
2785        }
2786        else {
2787            MADNESS_EXCEPTION("FunctionImpl: fcube: confused about NDIM?",NDIM);
2788        }
2789    }
2790
2791    template <typename T, int NDIM>
2792    Void FunctionImpl<T,NDIM>::project_refine_op(const keyT& key, bool do_refine) {
2793        PROFILE_MEMBER_FUNC(FunctionImpl);
2794        if (do_refine) {
2795            // Make in r child scaling function coeffs at level n+1
2796            tensorT r(cdata.v2k);
2797            for (KeyChildIterator<NDIM> it(key); it; ++it) {
2798                const keyT& child = it.key();
2799                r(child_patch(child)) = project(child);
2800            }
2801            // Filter then test difference coeffs at level n
2802            tensorT d = filter(r);
2803            tensorT s0;
2804            if (truncate_on_project) s0 = copy(d(cdata.s0));
2805            d(cdata.s0) = T(0);
2806
2807            if (d.normf()<truncate_tol(thresh,key.level()) || key.level()>=max_refine_level) {
2808                if (key.level()>=max_refine_level) print("MAX REFINE LEVEL",key);
2809                if (truncate_on_project) {
2810                    coeffs.replace(key,nodeT(s0,false));
2811                }
2812                else {
2813                    coeffs.replace(key,nodeT(tensorT(),true)); // Insert empty node for parent
2814                    for (KeyChildIterator<NDIM> it(key); it; ++it) {
2815                        const keyT& child = it.key();
2816                        coeffs.replace(child,nodeT(copy(r(child_patch(child))),false));
2817                    }
2818                }
2819            }
2820            else {
2821                coeffs.replace(key,nodeT(tensorT(),true)); // Insert empty node for parent
2822                for (KeyChildIterator<NDIM> it(key); it; ++it) {
2823                    const keyT& child = it.key();
2824                    ProcessID p;
2825                    if (FunctionDefaults<NDIM>::get_project_randomize()) {
2826                        p = world.random_proc();
2827                    }
2828                    else {
2829                        p = coeffs.owner(child);
2830                    }
2831                    PROFILE_BLOCK(proj_refine_send);
2832                    task(p, &implT::project_refine_op, child, do_refine); // ugh
2833                }
2834            }
2835        }
2836        else {
2837            coeffs.replace(key,nodeT(project(key),false));
2838        }
2839        return None;
2840    }
2841
2842    template <typename T, int NDIM>
2843    void FunctionImpl<T,NDIM>::add_scalar_inplace(T t, bool fence) {
2844        std::vector<long> v0(NDIM,0L);
2845        if (is_compressed()) {
2846            if (world.rank() == coeffs.owner(cdata.key0)) {
2847                typename dcT::iterator it = coeffs.find(cdata.key0).get();
2848                MADNESS_ASSERT(it != coeffs.end());
2849                nodeT& node = it->second;
2850                MADNESS_ASSERT(node.has_coeff());
2851                node.coeff()(v0) += t*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
2852            }
2853        }
2854        else {
2855            for(typename dcT::iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
2856                Level n = it->first.level();
2857                nodeT& node = it->second;
2858                if (node.has_coeff()) {
2859                    node.coeff()(v0) += t*sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*n)));
2860                }
2861            }
2862        }
2863        if (fence) world.gop.fence();
2864    }
2865
2866    template <typename T, int NDIM>
2867    void FunctionImpl<T,NDIM>::insert_zero_down_to_initial_level(const keyT& key) {
2868        PROFILE_MEMBER_FUNC(FunctionImpl);
2869        if (compressed) initial_level = std::max(initial_level,1); // Otherwise zero function is confused
2870        if (coeffs.is_local(key)) {
2871            if (compressed) {
2872                if (key.level() == initial_level) {
2873                    coeffs.replace(key, nodeT(tensorT(), false));
2874                }
2875                else {
2876                    coeffs.replace(key, nodeT(tensorT(cdata.v2k), true));
2877                }
2878            }
2879            else {
2880                if (key.level()<initial_level) {
2881                    coeffs.replace(key, nodeT(tensorT(), true));
2882                }
2883                else {
2884                    coeffs.replace(key, nodeT(tensorT(cdata.vk), false));
2885                }
2886            }
2887        }
2888        if (key.level() < initial_level) {
2889            for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2890                insert_zero_down_to_initial_level(kit.key());
2891            }
2892        }
2893
2894    }
2895
2896
2897    template <typename T, int NDIM>
2898    Future<bool> FunctionImpl<T,NDIM>::truncate_spawn(const keyT& key, double tol) {
2899        PROFILE_MEMBER_FUNC(FunctionImpl);
2900        typename dcT::iterator it = coeffs.find(key).get();
2901        if (it == coeffs.end()) {
2902            // In a standard tree all children would exist but some ops (transform)
2903            // can leave the tree in a messy state.  Just make the missing node as an
2904            // empty leaf.
2905            coeffs.replace(key,nodeT());
2906            it = coeffs.find(key).get();
2907        }
2908        nodeT& node = it->second;
2909        if (node.has_children()) {
2910            std::vector< Future<bool> > v = future_vector_factory<bool>(1<<NDIM);
2911            int i=0;
2912            for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
2913                v[i] = task(coeffs.owner(kit.key()), &implT::truncate_spawn, kit.key(), tol, TaskAttributes::generator());
2914            }
2915            return task(world.rank(),&implT::truncate_op, key, tol, v);
2916        }
2917        else {
2918            // In compressed form leaves should not have coeffs ... however the
2919            // transform op could leave the tree with leaves that do have coeffs
2920            // in which case we want something sensible to happen
2921            //MADNESS_ASSERT(!node.has_coeff());
2922            if (node.has_coeff() && key.level()>1) {
2923                double dnorm = node.coeff().normf();
2924                if (dnorm < truncate_tol(tol,key)) {
2925                    node.clear_coeff();
2926                }
2927            }
2928            return Future<bool>(node.has_coeff());
2929        }
2930    }
2931
2932
2933    template <typename T, int NDIM>
2934    bool FunctionImpl<T,NDIM>::truncate_op(const keyT& key, double tol, const std::vector< Future<bool> >& v) {
2935        PROFILE_MEMBER_FUNC(FunctionImpl);
2936        // If any child has coefficients, a parent cannot truncate
2937        for (int i=0; i<(1<<NDIM); i++) if (v[i].get()) return true;
2938        nodeT& node = coeffs.find(key).get()->second;
2939
2940        // Interior nodes should always have coeffs but transform might
2941        // leave empty interior nodes ... hence just force no coeffs to
2942        // be zero coeff unless it is a leaf.
2943        if (node.has_children() && !node.has_coeff()) node.set_coeff(tensorT(cdata.v2k));
2944
2945        if (key.level() > 1) { // >1 rather >0 otherwise reconstruct might get confused
2946            double dnorm = node.coeff().normf();
2947            if (dnorm < truncate_tol(tol,key)) {
2948                node.clear_coeff();
2949                if (node.has_children()) {
2950                    node.set_has_children(false);
2951                    for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2952                        coeffs.erase(kit.key());
2953                    }
2954                }
2955            }
2956        }
2957        return node.has_coeff();
2958    }
2959
2960
2961    template <typename T, int NDIM>
2962    void FunctionImpl<T,NDIM>::print_tree(Level maxlevel) const {
2963        if (world.rank() == 0) do_print_tree(cdata.key0, maxlevel);
2964        world.gop.fence();
2965        if (world.rank() == 0) std::cout.flush();
2966        world.gop.fence();
2967    }
2968
2969
2970    template <typename T, int NDIM>
2971    void FunctionImpl<T,NDIM>::do_print_tree(const keyT& key, Level maxlevel) const {
2972        typename dcT::const_iterator it = coeffs.find(key).get();
2973        if (it == coeffs.end()) {
2974            MADNESS_EXCEPTION("FunctionImpl: do_print_tree: null node pointer",0);
2975        }
2976        const nodeT& node = it->second;
2977        for (int i=0; i<key.level(); i++) std::cout << "  ";
2978        std::cout << key << "  " << node << " --> " << coeffs.owner(key) << "\n";
2979	for (int i=0; i<key.level(); i++) std::cout << "  ";
2980	std::cout << node.coeff() << "\n";
2981        if (key.level() < maxlevel  &&  node.has_children()) {
2982            for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
2983                do_print_tree(kit.key(),maxlevel);
2984            }
2985        }
2986    }
2987
2988    template <typename T, int NDIM>
2989    Tensor<T> FunctionImpl<T,NDIM>::project(const keyT& key) const {
2990        PROFILE_MEMBER_FUNC(FunctionImpl);
2991        MADNESS_ASSERT(cdata.npt == cdata.k); // only necessary due to use of fast transform
2992        tensorT fval(cdata.vq,false); // this will be the returned result
2993        tensorT work(cdata.vk,false); // initially evaluate the function in here
2994        tensorT workq(cdata.vq,false); // initially evaluate the function in here
2995
2996        if (functor) {
2997            fcube(key,*functor,cdata.quad_x,work);
2998        }
2999        else {
3000            MADNESS_EXCEPTION("FunctionImpl: project: confusion about function?",0);
3001        }
3002
3003        work.scale(sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*key.level()))));
3004        //return transform(work,cdata.quad_phiw);
3005        return fast_transform(work,cdata.quad_phiw,fval,workq);
3006    }
3007
3008    template <typename T, int NDIM>
3009    Future<double> FunctionImpl<T,NDIM>::get_norm_tree_recursive(const keyT& key) const {
3010        if (coeffs.probe(key)) {
3011            return Future<double>(coeffs.find(key).get()->second.get_norm_tree());
3012        }
3013        MADNESS_ASSERT(key.level());
3014        keyT parent = key.parent();
3015        return task(coeffs.owner(parent), &implT::get_norm_tree_recursive, parent, TaskAttributes::hipri());
3016    }
3017
3018
3019    template <typename T, int NDIM>
3020    Void FunctionImpl<T,NDIM>::sock_it_to_me(const keyT& key,
3021                                             const RemoteReference< FutureImpl< std::pair<keyT,tensorT> > >& ref) const {
3022        PROFILE_MEMBER_FUNC(FunctionImpl);
3023        if (coeffs.probe(key)) {
3024            const nodeT& node = coeffs.find(key).get()->second;
3025            Future< std::pair<keyT,tensorT> > result(ref);
3026            if (node.has_coeff()) {
3027                //madness::print("sock found it with coeff",key);
3028                result.set(std::pair<keyT,tensorT>(key,node.coeff()));
3029            }
3030            else {
3031                //madness::print("sock found it without coeff",key);
3032                result.set(std::pair<keyT,tensorT>(key,tensorT()));
3033            }
3034        }
3035        else {
3036            keyT parent = key.parent();
3037            //madness::print("sock forwarding to parent",key,parent);
3038            PROFILE_BLOCK(sitome_send);
3039            task(coeffs.owner(parent), &FunctionImpl<T,NDIM>::sock_it_to_me, parent, ref, TaskAttributes::hipri());
3040        }
3041        return None;
3042    }
3043
3044    template <typename T, int NDIM>
3045    Void FunctionImpl<T,NDIM>::eval(const Vector<double,NDIM>& xin,
3046                                    const keyT& keyin,
3047                                    const typename Future<T>::remote_refT& ref) {
3048
3049        PROFILE_MEMBER_FUNC(FunctionImpl);
3050        // This is ugly.  We must figure out a clean way to use
3051        // owner computes rule from the container.
3052        Vector<double,NDIM> x = xin;
3053        keyT key = keyin;
3054        Vector<Translation,NDIM> l = key.translation();
3055        ProcessID me = world.rank();
3056        while (1) {
3057            ProcessID owner = coeffs.owner(key);
3058            if (owner != me) {
3059                PROFILE_BLOCK(eval_send);
3060                task(owner, &implT::eval, x, key, ref, TaskAttributes::hipri());
3061                return None;
3062            }
3063            else {
3064                typename dcT::futureT fut = coeffs.find(key);
3065                typename dcT::iterator it = fut.get();
3066                nodeT& node = it->second;
3067                if (node.has_coeff()) {
3068                    Future<T>(ref).set(eval_cube(key.level(), x, node.coeff()));
3069                    return None;
3070                }
3071                else {
3072                    for (int i=0; i<NDIM; i++) {
3073                        double xi = x[i]*2.0;
3074                        int li = int(xi);
3075                        if (li == 2) li = 1;
3076                        x[i] = xi - li;
3077                        l[i] = 2*l[i] + li;
3078                    }
3079                    key = keyT(key.level()+1,l);
3080                }
3081            }
3082        }
3083        //MADNESS_EXCEPTION("should not be here",0);
3084    }
3085
3086    template <typename T, int NDIM>
3087    void FunctionImpl<T,NDIM>::tnorm(const tensorT& t, double* lo, double* hi) const {
3088        PROFILE_MEMBER_FUNC(FunctionImpl);
3089        // Chosen approach looks stupid but it is more accurate
3090        // than the simple approach of summing everything and
3091        // subtracting off the low-order stuff to get the high
3092        // order (assuming the high-order stuff is small relative
3093        // to the low-order)
3094        tensorT work = copy(t);
3095        tensorT tlo = work(cdata.sh);
3096        *lo = tlo.normf();
3097        tlo.fill(0.0);
3098        *hi = work.normf();
3099    }
3100
3101    namespace detail {
3102        template <typename A, typename B>
3103        struct noop {
3104            void operator()(const A& a, const B& b) const {};
3105
3106            template <typename Archive> void serialize(Archive& ar) {}
3107        };
3108
3109        template <typename T, int NDIM>
3110        struct scaleinplace {
3111            T q;
3112            scaleinplace() {}
3113            scaleinplace(T q) : q(q) {}
3114            void operator()(const Key<NDIM>& key, Tensor<T>& t) const {t.scale(q);}
3115            template <typename Archive> void serialize(Archive& ar) {ar & q;}
3116        };
3117
3118        template <typename T, int NDIM>
3119        struct squareinplace {
3120            void operator()(const Key<NDIM>& key, Tensor<T>& t) const {t.emul(t);}
3121            template <typename Archive> void serialize(Archive& ar) {}
3122        };
3123
3124        template <typename T, int NDIM>
3125        struct absinplace {
3126            void operator()(const Key<NDIM>& key, Tensor<T>& t) const {abs(t);}
3127            template <typename Archive> void serialize(Archive& ar) {}
3128        };
3129     }
3130
3131    template <typename T, int NDIM>
3132    void FunctionImpl<T,NDIM>::scale_inplace(const T q, bool fence)
3133    {
3134        unary_op_coeff_inplace(detail::scaleinplace<T,NDIM>(q), fence);
3135    }
3136
3137    template <typename T, int NDIM>
3138    void FunctionImpl<T,NDIM>::square_inplace(bool fence) {
3139        //unary_op_value_inplace(&implT::autorefine_square_test, detail::squareinplace<T,NDIM>(), fence);
3140        unary_op_value_inplace(detail::squareinplace<T,NDIM>(), fence);
3141    }
3142
3143    template <typename T, int NDIM>
3144    void FunctionImpl<T,NDIM>::phi_for_mul(Level np, Translation lp, Level nc, Translation lc, Tensor<double>& phi) const {
3145        PROFILE_MEMBER_FUNC(FunctionImpl);
3146        double p[200];
3147        double scale = pow(2.0,double(np-nc));
3148        for (int mu=0; mu<cdata.npt; mu++) {
3149            double xmu = scale*(cdata.quad_x(mu)+lc) - lp;
3150            MADNESS_ASSERT(xmu>-1e-15 && xmu<(1+1e-15));
3151            legendre_scaling_functions(xmu,cdata.k,p);
3152            for (int i=0; i<k; i++) phi(i,mu) = p[i];
3153        }
3154        phi.scale(pow(2.0,0.5*np));
3155    }
3156
3157    template <typename T, int NDIM>
3158    const Tensor<T> FunctionImpl<T,NDIM>::parent_to_child(const tensorT& s, const keyT& parent, const keyT& child) const {
3159        PROFILE_MEMBER_FUNC(FunctionImpl);
3160        // An invalid parent/child means that they are out of the box
3161        // and it is the responsibility of the caller to worry about that
3162        // ... most likely the coefficients (s) are zero to reflect
3163        // zero B.C. so returning s makes handling this easy.
3164        if (parent == child || parent.is_invalid() || child.is_invalid()) return s;
3165
3166        tensorT result = fcube_for_mul<T>(child, parent, s);
3167        result.scale(sqrt(FunctionDefaults<NDIM>::get_cell_volume()*pow(0.5,double(NDIM*child.level()))));
3168        result = transform(result,cdata.quad_phiw);
3169
3170        return result;
3171    }
3172
3173
3174    template <typename T, int NDIM>
3175    T FunctionImpl<T,NDIM>::trace_local() const {
3176        PROFILE_MEMBER_FUNC(FunctionImpl);
3177        std::vector<long> v0(NDIM,0);
3178        T sum = 0.0;
3179        if (compressed) {
3180            if (world.rank() == coeffs.owner(cdata.key0)) {
3181                typename dcT::const_iterator it = coeffs.find(cdata.key0).get();
3182                if (it != coeffs.end()) {
3183                    const nodeT& node = it->second;
3184                    if (node.has_coeff()) sum = node.coeff()(v0);
3185                }
3186            }
3187        }
3188        else {
3189            for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
3190                const keyT& key = it->first;
3191                const nodeT& node = it->second;
3192                if (node.has_coeff()) sum += node.coeff()(v0)*pow(0.5,NDIM*key.level()*0.5);
3193            }
3194        }
3195        return sum*sqrt(FunctionDefaults<NDIM>::get_cell_volume());
3196    }
3197
3198
3199    template <typename T, int NDIM>
3200    void FunctionImpl<T,NDIM>::diff(const implT& f, int axis, bool fence) {
3201        PROFILE_MEMBER_FUNC(FunctionImpl);
3202        typedef std::pair<keyT,tensorT> argT;
3203        for(typename dcT::const_iterator it=f.coeffs.begin(); it!=f.coeffs.end(); ++it) {
3204            const keyT& key = it->first;
3205            const nodeT& node = it->second;
3206            if (node.has_coeff()) {
3207                Future<argT> left = f.find_neighbor(key,axis,-1);
3208                argT center(key,node.coeff());
3209                Future<argT> right  = f.find_neighbor(key,axis, 1);
3210                task(world.rank(), &implT::do_diff1, &f, axis, key, left, center, right);
3211            }
3212            else {
3213                // Internal empty node can be safely inserted
3214                coeffs.replace(key,nodeT(tensorT(),true));
3215            }
3216        }
3217        if (fence) world.gop.fence();
3218    }
3219
3220    static bool enforce_bc(int bc_left, int bc_right, Level n, Translation& l) {
3221        Translation two2n = 1ul << n;
3222        if (l < 0) {
3223            if (bc_left == 0) {
3224                return false; // Zero BC
3225            }
3226            else if (bc_left == 1) {
3227                l += two2n; // Periodic BC
3228            }
3229            else {
3230                MADNESS_EXCEPTION("enforce_bc: confused left BC?",bc_left);
3231            }
3232        }
3233        else if (l >= two2n) {
3234            if (bc_right == 0) {
3235                return false; // Zero BC
3236            }
3237            else if (bc_right == 1) {
3238                l -= two2n; // Periodic BC
3239            }
3240            else {
3241                MADNESS_EXCEPTION("enforce_bc: confused BC right?",bc_left);
3242            }
3243        }
3244        return true;
3245    }
3246
3247
3248    template <typename T, int NDIM>
3249    Key<NDIM> FunctionImpl<T,NDIM>::neighbor(const keyT& key, int axis, int step) const {
3250        Vector<Translation,NDIM> l = key.translation();
3251
3252        l[axis] += step;
3253
3254        if (!enforce_bc(bc(axis,0), bc(axis,1), key.level(), l[axis])) {
3255            return keyT::invalid();
3256        }
3257        else {
3258            return keyT(key.level(),l);
3259        }
3260    }
3261
3262    template <typename T, int NDIM>
3263    Key<NDIM> FunctionImpl<T,NDIM>::neighbor(const keyT& key, const Key<NDIM>& disp) const {
3264        Vector<Translation,NDIM> l = key.translation();
3265
3266        for (int axis=0; axis<NDIM; axis++) {
3267            l[axis] += disp.translation()[axis];
3268
3269            if (!enforce_bc(bc(axis,0), bc(axis,1), key.level(), l[axis])) {
3270                return keyT::invalid();
3271            }
3272        }
3273        return keyT(key.level(),l);
3274    }
3275
3276    template <typename T, int NDIM>
3277    Future< std::pair< Key<NDIM>,Tensor<T> > >
3278    FunctionImpl<T,NDIM>::find_neighbor(const Key<NDIM>& key, int axis, int step) const {
3279        PROFILE_MEMBER_FUNC(FunctionImpl);
3280        typedef std::pair< Key<NDIM>,Tensor<T> > argT;
3281        keyT neigh = neighbor(key, axis, step);
3282        if (neigh.is_invalid()) {
3283            return Future<argT>(argT(neigh,tensorT(cdata.vk))); // Zero bc
3284        }
3285        else {
3286            Future<argT> result;
3287            PROFILE_BLOCK(find_neigh_send);
3288            task(coeffs.owner(neigh), &implT::sock_it_to_me, neigh, result.remote_ref(world), TaskAttributes::hipri());
3289            return result;
3290        }
3291    }
3292
3293    template <typename T, int NDIM>
3294    Void FunctionImpl<T,NDIM>::forward_do_diff1(const implT* f, int axis, const keyT& key,
3295                                                const std::pair<keyT,tensorT>& left,
3296                                                const std::pair<keyT,tensorT>& center,
3297                                                const std::pair<keyT,tensorT>& right) {
3298        PROFILE_MEMBER_FUNC(FunctionImpl);
3299        ProcessID owner = coeffs.owner(key);
3300        if (owner == world.rank()) {
3301            if (left.second.size == 0) {
3302                task(owner, &implT::do_diff1, f, axis, key, f->find_neighbor(key,axis,-1), center, right, TaskAttributes::hipri());
3303            }
3304            else if (right.second.size == 0) {
3305                task(owner, &implT::do_diff1, f, axis, key, left, center, f->find_neighbor(key,axis,1), TaskAttributes::hipri());
3306            }
3307            else {
3308                task(owner, &implT::do_diff2, f, axis, key, left, center, right);
3309            }
3310        }
3311        else {
3312            task(owner, &implT::forward_do_diff1, f, axis, key, left, center, right, TaskAttributes::hipri());
3313        }
3314        return None;
3315    }
3316
3317    template <typename T, int NDIM>
3318    Void FunctionImpl<T,NDIM>::do_diff1(const implT* f, int axis, const keyT& key,
3319                                        const std::pair<keyT,tensorT>& left,
3320                                        const std::pair<keyT,tensorT>& center,
3321                                        const std::pair<keyT,tensorT>& right) {
3322        PROFILE_MEMBER_FUNC(FunctionImpl);
3323        typedef std::pair<keyT,tensorT> argT;
3324
3325        MADNESS_ASSERT(axis>=0 && axis<NDIM);
3326
3327        if (left.second.size==0 || right.second.size==0) {
3328            // One of the neighbors is below us in the tree ... recur down
3329            coeffs.replace(key,nodeT(tensorT(),true));
3330            for (KeyChildIterator<NDIM> kit(key); kit; ++kit) {
3331                const keyT& child = kit.key();
3332                if ((child.translation()[axis]&1) == 0) {
3333                    // leftmost child automatically has right sibling
3334                    forward_do_diff1(f, axis, child, left, center, center);
3335                }
3336                else {
3337                    // rightmost child automatically has left sibling
3338                    forward_do_diff1(f, axis, child, center, center, right);
3339                }
3340            }
3341        }
3342        else {
3343            forward_do_diff1(f, axis, key, left, center, right);
3344        }
3345        return None;
3346    }
3347
3348    template <typename T, int NDIM>
3349    Void FunctionImpl<T,NDIM>::do_diff2(const implT* f, int axis, const keyT& key,
3350                                        const std::pair<keyT,tensorT>& left,
3351                                        const std::pair<keyT,tensorT>& center,
3352                                        const std::pair<keyT,tensorT>& right) {
3353        PROFILE_MEMBER_FUNC(FunctionImpl);
3354        typedef std::pair<keyT,tensorT> argT;
3355
3356        tensorT d = madness::inner(cdata.rp,
3357                                   parent_to_child(left.second, left.first, neighbor(key,axis,-1)).swapdim(axis,0),
3358                                   1, 0);
3359        inner_result(cdata.r0,
3360                     parent_to_child(center.second, center.first, key).swapdim(axis,0),
3361                     1, 0, d);
3362        inner_result(cdata.rm,
3363                     parent_to_child(right.second, right.first, neighbor(key,axis,1)).swapdim(axis,0),
3364                     1, 0, d);
3365        if (axis) d = copy(d.swapdim(axis,0)); // make it contiguous
3366        d.scale(FunctionDefaults<NDIM>::get_rcell_width()[axis]*pow(2.0,(double) key.level()));
3367        coeffs.replace(key,nodeT(d,false));
3368        return None;
3369    }
3370
3371    template <typename T, int NDIM>
3372    void FunctionImpl<T,NDIM>::mapdim(const implT& f, const std::vector<long>& map, bool fence) {
3373        PROFILE_MEMBER_FUNC(FunctionImpl);
3374        for(typename dcT::const_iterator it=f.coeffs.begin(); it!=f.coeffs.end(); ++it) {
3375            const keyT& key = it->first;
3376            const nodeT& node = it->second;
3377
3378            Vector<Translation,NDIM> l;
3379            for (int i=0; i<NDIM; i++) l[map[i]] = key.translation()[i];
3380            tensorT c = node.coeff();
3381            if (c.size) c = copy(c.mapdim(map));
3382
3383            coeffs.replace(keyT(key.level(),l), nodeT(c,node.has_children()));
3384        }
3385        if (fence) world.gop.fence();
3386    }
3387
3388    template <typename T, int NDIM>
3389    Future< Tensor<T> > FunctionImpl<T,NDIM>::compress_spawn(const Key<NDIM>& key, bool nonstandard, bool keepleaves) {
3390        PROFILE_MEMBER_FUNC(FunctionImpl);
3391        MADNESS_ASSERT(coeffs.probe(key));
3392        nodeT& node = coeffs.find(key).get()->second;
3393        if (node.has_children()) {
3394            std::vector< Future<tensorT> > v = future_vector_factory<tensorT>(1<<NDIM);
3395            int i=0;
3396            for (KeyChildIterator<NDIM> kit(key); kit; ++kit,++i) {
3397                PROFILE_BLOCK(compress_send);
3398                v[i] = task(coeffs.owner(kit.key()), &implT::compress_spawn, kit.key(), nonstandard, keepleaves, TaskAttributes::hipri());
3399            }
3400            return task(world.rank(),&implT::compress_op, key, v, nonstandard);
3401        }
3402        else {
3403            Future<tensorT> result(node.coeff());
3404            if (!keepleaves) node.clear_coeff();
3405            return result;
3406        }
3407    }
3408
3409    template <typename T, int NDIM>
3410    Tensor<T> FunctionImpl<T,NDIM>::eval_plot_cube(const coordT& plotlo,
3411                                                   const coordT& plothi,
3412                                                   const std::vector<long>& npt) const {
3413        PROFILE_MEMBER_FUNC(FunctionImpl);
3414        Tensor<T> r(NDIM, &npt[0]);
3415        //         r(___) = 99.0;
3416        MADNESS_ASSERT(!compressed);
3417
3418        coordT h; // Increment between points in each dimension
3419        for (int i=0; i<NDIM; i++) {
3420            if (npt[i] > 1) {
3421                h[i] = (plothi[i]-plotlo[i])/(npt[i]-1);
3422            }
3423            else {
3424                MADNESS_ASSERT(plotlo[i] == plothi[i]);
3425                h[i] = 0.0;
3426            }
3427        }
3428        //print("plot info", plotlo, plothi, npt, h);
3429
3430        // Loop thru local boxes ... THIS NEEDS MULTITHREADING !!!
3431        for(typename dcT::const_iterator it=coeffs.begin(); it!=coeffs.end(); ++it) {
3432            const keyT& key = it->first;
3433            const nodeT& node = it->second;
3434            if (node.has_coeff()) {
3435                //print("Looking at", key);
3436                // Determine the points, if any, of the plot grid that
3437                // are contained within this box
3438                coordT boxlo, boxhi;
3439                Vector<int,NDIM> boxnpt;
3440                double fac = pow(0.5,double(key.level()));
3441                int npttotal = 1;
3442                for (int d=0; d<NDIM; d++) {
3443                    // Coords of box
3444                    boxlo[d] = fac*key.translation()[d];
3445                    boxhi[d] = boxlo[d]+fac;
3446
3447                    if (boxlo[d] > plothi[d] || boxhi[d] < plotlo[d]) {
3448                        // Discard boxes out of the plot range
3449                        npttotal = boxnpt[d] = 0;
3450                        //print("OO range?");
3451                        break;
3452                    }
3453                    else if (npt[d] == 1) {
3454                        // This dimension is only a single point
3455                        boxlo[d] = boxhi[d] = plotlo[d];
3456                        boxnpt[d] = 1;
3457                    }
3458                    else {
3459                        // Restrict to plot range
3460//                         boxlo[d] = std::max(boxlo[d],plotlo[d]);
3461//                         boxhi[d] = std::min(boxhi[d],plothi[d]);
3462
3463                        // Round lo up to next plot point; round hi down
3464                        double xlo = long((boxlo[d]-plotlo[d])/h[d])*h[d] + plotlo[d];
3465                        if (xlo < boxlo[d]) xlo += h[d];
3466                        boxlo[d] =  xlo;
3467                        double xhi = long((boxhi[d]-plotlo[d])/h[d])*h[d] + plotlo[d];
3468                        if (xhi > boxhi[d]) xhi -= h[d];
3469                        // MADNESS_ASSERT(xhi >= xlo);  // nope
3470                        boxhi[d] = xhi;
3471                        boxnpt[d] = long(round((boxhi[d] - boxlo[d])/h[d])) + 1;
3472                    }
3473                    npttotal *= boxnpt[d];
3474                }
3475                //print("    box", boxlo, boxhi, boxnpt, npttotal);
3476                if (npttotal > 0) {
3477                    const tensorT& coeff = node.coeff();
3478                    const Level n = key.level();
3479                    const Vector<Translation,NDIM>& l = key.translation();
3480                    const double twon = pow(2.0,double(n));
3481                    long ind[NDIM];
3482                    coordT x;
3483                    for (IndexIterator it(boxnpt); it; ++it) {
3484                        for (int d=0; d<NDIM; d++) {
3485                            double xd = boxlo[d] + it[d]*h[d]; // Sim. coords of point
3486                            x[d] = twon*xd - l[d]; // Offset within box
3487                            MADNESS_ASSERT(x[d]>=0.0 && x[d] <=1.0);  // sanity
3488                            if (npt[d] > 1) {
3489                                ind[d] = long(round((xd-plotlo[d])/h[d])); // Index of plot point
3490                            }
3491                            else {
3492                                ind[d] = 0;
3493                            }
3494                            MADNESS_ASSERT(ind[d]>=0 && ind[d]<npt[d]); // sanity
3495                        }
3496                        r(ind) = eval_cube(n, x, coeff);
3497                        //print("computing", n, x, ind, r(ind));
3498                    }
3499                }
3500            }
3501        }
3502
3503        //        ITERATOR(r, if (r(IND) == 99.0) {print("BAD", IND); error("bad",0);});
3504
3505        return r;
3506    }
3507
3508    static void dxprintvalue(FILE* f, const double t) {
3509        fprintf(f,"%.6e\n",t);
3510    }
3511
3512    static void dxprintvalue(FILE* f, const double_complex& t) {
3513        fprintf(f,"%.6e %.6e\n", t.real(), t.imag());
3514    }
3515
3516    template <typename T, int NDIM>
3517    void plotdx(const Function<T,NDIM>& function,
3518                const char* filename,
3519                const Tensor<double>& cell,
3520                const std::vector<long>& npt,
3521                bool binary) {
3522        PROFILE_FUNC;
3523        MADNESS_ASSERT(NDIM<=6);
3524        const char* element[6] = {"lines","quads","cubes","cubes4D","cubes5D","cubes6D"};
3525
3526        function.verify();
3527        World& world = const_cast< Function<T,NDIM>& >(function).world();
3528        FILE *f=0;
3529        if (world.rank() == 0) {
3530            f = fopen(filename, "w");
3531            if (!f) MADNESS_EXCEPTION("plotdx: failed to open the plot file", 0);
3532
3533            fprintf(f,"object 1 class gridpositions counts ");
3534            for (int d=0; d<NDIM; d++) fprintf(f," %ld",npt[d]);
3535            fprintf(f,"\n");
3536
3537            fprintf(f,"origin ");
3538            for (int d=0; d<NDIM; d++) fprintf(f, " %.6e", cell(d,0));
3539            fprintf(f,"\n");
3540
3541            for (int d=0; d<NDIM; d++) {
3542                fprintf(f,"delta ");
3543                for (int c=0; c<d; c++) fprintf(f, " 0");
3544                double h = 0.0;
3545                if (npt[d]>1) h = (cell(d,1)-cell(d,0))/(npt[d]-1);
3546                fprintf(f," %.6e", h);
3547                for (int c=d+1; c<NDIM; c++) fprintf(f, " 0");
3548                fprintf(f,"\n");
3549            }
3550            fprintf(f,"\n");
3551
3552            fprintf(f,"object 2 class gridconnections counts ");
3553            for (int d=0; d<NDIM; d++) fprintf(f," %ld",npt[d]);
3554            fprintf(f,"\n");
3555            fprintf(f, "attribute \"element type\" string \"%s\"\n", element[NDIM-1]);
3556            fprintf(f, "attribute \"ref\" string \"positions\"\n");
3557            fprintf(f,"\n");
3558
3559            int npoint = 1;
3560            for (int d=0; d<NDIM; d++) npoint *= npt[d];
3561            const char* iscomplex = "";
3562            if (TensorTypeData<T>::iscomplex) iscomplex = "category complex";
3563            const char* isbinary = "";
3564            if (binary) isbinary = "binary";
3565            fprintf(f,"object 3 class array type double %s rank 0 items %d %s data follows\n",
3566                    iscomplex, npoint, isbinary);
3567        }
3568
3569        world.gop.fence();
3570        Tensor<T> r = function.eval_cube(cell, npt);
3571
3572        if (world.rank() == 0) {
3573            if (binary) {
3574                // This assumes that the values are double precision
3575                fflush(f);
3576                fwrite((void *) r.ptr(), sizeof(T), r.size, f);
3577                fflush(f);
3578            }
3579            else {
3580                for (IndexIterator it(npt); it; ++it) {
3581                    //fprintf(f,"%.6e\n",r(*it));
3582                    dxprintvalue(f,r(*it));
3583                }
3584            }
3585            fprintf(f,"\n");
3586
3587            fprintf(f,"object \"%s\" class field\n",filename);
3588            fprintf(f,"component \"positions\" value 1\n");
3589            fprintf(f,"component \"connections\" value 2\n");
3590            fprintf(f,"component \"data\" value 3\n");
3591            fprintf(f,"\nend\n");
3592            fclose(f);
3593        }
3594        world.gop.fence();
3595    }
3596
3597    template <int NDIM>
3598    void FunctionDefaults<NDIM>::set_defaults (World& world) {
3599            k = 7;
3600            thresh = 1e-5;
3601            initial_level = 2;
3602            max_refine_level = 30;
3603            truncate_mode = 0;
3604            refine = true;
3605            autorefine = true;
3606            debug = false;
3607            truncate_on_project = false;
3608            apply_randomize = false;
3609            project_randomize = false;
3610            bc = Tensor<int>(NDIM,2);
3611            cell = Tensor<double>(NDIM,2);
3612            cell(_,1) = 1.0;
3613            recompute_cell_info();
3614
3615            //pmap = SharedPtr< WorldDCPmapInterface< Key<NDIM> > >(new WorldDCDefaultPmap< Key<NDIM> >(world));
3616            pmap = SharedPtr< WorldDCPmapInterface< Key<NDIM> > >(new MyPmap<NDIM>(world));
3617            //pmap = SharedPtr< WorldDCPmapInterface< Key<NDIM> > >(new SimpleMap< Key<NDIM> >(world));
3618        }
3619
3620
3621    //
3622    // Below here we instantiate templates defined in this file
3623    //
3624
3625
3626    template <typename T, int NDIM>
3627    FunctionCommonData<T,NDIM> FunctionCommonData<T,NDIM>::data[MAXK+1];
3628
3629    template <int NDIM> int FunctionDefaults<NDIM>::k;
3630    template <int NDIM> double FunctionDefaults<NDIM>::thresh;
3631    template <int NDIM> int FunctionDefaults<NDIM>::initial_level;
3632    template <int NDIM> int FunctionDefaults<NDIM>::max_refine_level;
3633    template <int NDIM> int FunctionDefaults<NDIM>::truncate_mode;
3634    template <int NDIM> bool FunctionDefaults<NDIM>::refine;
3635    template <int NDIM> bool FunctionDefaults<NDIM>::autorefine;
3636    template <int NDIM> bool FunctionDefaults<NDIM>::debug;
3637    template <int NDIM> bool FunctionDefaults<NDIM>::truncate_on_project;
3638    template <int NDIM> bool FunctionDefaults<NDIM>::apply_randomize;
3639    template <int NDIM> bool FunctionDefaults<NDIM>::project_randomize;
3640    template <int NDIM> Tensor<int> FunctionDefaults<NDIM>::bc;
3641    template <int NDIM> Tensor<double> FunctionDefaults<NDIM>::cell;
3642    template <int NDIM> Tensor<double> FunctionDefaults<NDIM>::cell_width;
3643    template <int NDIM> Tensor<double> FunctionDefaults<NDIM>::rcell_width;
3644    template <int NDIM> double FunctionDefaults<NDIM>::cell_volume;
3645    template <int NDIM> double FunctionDefaults<NDIM>::cell_min_width;
3646    template <int NDIM> SharedPtr< WorldDCPmapInterface< Key<NDIM> > > FunctionDefaults<NDIM>::pmap;
3647
3648    template <int NDIM> std::vector< Key<NDIM> > Displacements<NDIM>::disp;
3649    template <int NDIM> std::vector< Key<NDIM> > Displacements<NDIM>::disp_periodicsum[64];
3650
3651#ifdef FUNCTION_INSTANTIATE_1
3652    template class FunctionDefaults<1>;
3653    template class Function<double, 1>;
3654    template class Function<std::complex<double>, 1>;
3655    template class FunctionImpl<double, 1>;
3656    template class FunctionImpl<std::complex<double>, 1>;
3657    template class FunctionCommonData<double, 1>;
3658    template class FunctionCommonData<double_complex, 1>;
3659    template class Displacements<1>;
3660
3661    template void plotdx<double,1>(const Function<double,1>&, const char*, const Tensor<double>&,
3662                                   const std::vector<long>&, bool binary);
3663    template void plotdx<double_complex,1>(const Function<double_complex,1>&, const char*, const Tensor<double>&,
3664                                           const std::vector<long>&, bool binary);
3665#endif
3666
3667#ifdef FUNCTION_INSTANTIATE_2
3668    template class FunctionDefaults<2>;
3669    template class Function<double, 2>;
3670    template class Function<std::complex<double>, 2>;
3671    template class FunctionImpl<double, 2>;
3672    template class FunctionImpl<std::complex<double>, 2>;
3673    template class FunctionCommonData<double, 2>;
3674    template class FunctionCommonData<double_complex, 2>;
3675    template class Displacements<2>;
3676
3677    template void plotdx<double,2>(const Function<double,2>&, const char*, const Tensor<double>&,
3678                                   const std::vector<long>&, bool binary);
3679    template void plotdx<double_complex,2>(const Function<double_complex,2>&, const char*, const Tensor<double>&,
3680                                   const std::vector<long>&, bool binary);
3681#endif
3682
3683#ifdef FUNCTION_INSTANTIATE_3
3684    template class FunctionDefaults<3>;
3685    template class Function<double, 3>;
3686    template class Function<std::complex<double>, 3>;
3687    template class FunctionImpl<double, 3>;
3688    template class FunctionImpl<std::complex<double>, 3>;
3689    template class FunctionCommonData<double, 3>;
3690    template class FunctionCommonData<double_complex, 3>;
3691    template class Displacements<3>;
3692
3693    template void plotdx<double,3>(const Function<double,3>&, const char*, const Tensor<double>&,
3694                                   const std::vector<long>&, bool binary);
3695    template void plotdx<double_complex,3>(const Function<double_complex,3>&, const char*, const Tensor<double>&,
3696                                   const std::vector<long>&, bool binary);
3697#endif
3698
3699#ifdef FUNCTION_INSTANTIATE_4
3700    template class FunctionDefaults<4>;
3701    template class Function<double, 4>;
3702    template class Function<std::complex<double>, 4>;
3703    template class FunctionImpl<double, 4>;
3704    template class FunctionImpl<std::complex<double>, 4>;
3705    template class FunctionCommonData<double, 4>;
3706    template class FunctionCommonData<double_complex, 4>;
3707    template class Displacements<4>;
3708
3709    template void plotdx<double,4>(const Function<double,4>&, const char*, const Tensor<double>&,
3710                                   const std::vector<long>&, bool binary);
3711    template void plotdx<double_complex,4>(const Function<double_complex,4>&, const char*, const Tensor<double>&,
3712                                   const std::vector<long>&, bool binary);
3713#endif
3714
3715#ifdef FUNCTION_INSTANTIATE_5
3716    template class FunctionDefaults<5>;
3717    template class Function<double, 5>;
3718    template class Function<std::complex<double>, 5>;
3719    template class FunctionImpl<double, 5>;
3720    template class FunctionImpl<std::complex<double>, 5>;
3721    template class FunctionCommonData<double, 5>;
3722    template class FunctionCommonData<double_complex, 5>;
3723    template class Displacements<5>;
3724
3725    template void plotdx<double,5>(const Function<double,5>&, const char*, const Tensor<double>&,
3726                                   const std::vector<long>&, bool binary);
3727    template void plotdx<double_complex,5>(const Function<double_complex,5>&, const char*, const Tensor<double>&,
3728                                   const std::vector<long>&, bool binary);
3729#endif
3730
3731#ifdef FUNCTION_INSTANTIATE_6
3732    template class FunctionDefaults<6>;
3733    template class Function<double, 6>;
3734    template class Function<std::complex<double>, 6>;
3735    template class FunctionImpl<double, 6>;
3736    template class FunctionImpl<std::complex<double>, 6>;
3737    template class FunctionCommonData<double, 6>;
3738    template class FunctionCommonData<double_complex, 6>;
3739    template class Displacements<6>;
3740
3741    template void plotdx<double,6>(const Function<double,6>&, const char*, const Tensor<double>&,
3742                                   const std::vector<long>&, bool binary);
3743    template void plotdx<double_complex,6>(const Function<double_complex,6>&, const char*, const Tensor<double>&,
3744                                   const std::vector<long>&, bool binary);
3745#endif
3746
3747    template <>
3748    ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double> > >  GaussianConvolution1DCache<double>::map = ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double> > >();
3749
3750    template <>
3751    ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double_complex> > > GaussianConvolution1DCache<double_complex>::map = ConcurrentHashMap< double, SharedPtr< GaussianConvolution1D<double_complex> > >();
3752}
3753
3754/// Quietly used as a global lock when looking for bugs with multiple threads
3755madness::Mutex THELOCK;
3756
3757