1 ////////////////////////////////////////////////////////////////////////////////
2 //
3 // Copyright (c) 2008 The Regents of the University of California
4 //
5 // This file is part of Qbox
6 //
7 // Qbox is distributed under the terms of the GNU General Public License
8 // as published by the Free Software Foundation, either version 2 of
9 // the License, or (at your option) any later version.
10 // See the file COPYING in the root directory of this distribution
11 // or <http://www.gnu.org/licenses/>.
12 //
13 ////////////////////////////////////////////////////////////////////////////////
14 //
15 // Basis.cpp
16 //
17 ////////////////////////////////////////////////////////////////////////////////
18 
19 #include "Basis.h"
20 #include <cmath>
21 #include <cassert>
22 #include <cstdlib>
23 #include <vector>
24 #include <set>
25 #include <algorithm>
26 #include <functional>
27 #include <iostream>
28 #include <iomanip>
29 using namespace std;
30 
31 ////////////////////////////////////////////////////////////////////////////////
localmemsize(void) const32 double Basis::localmemsize(void) const
33 {
34   return
35   5.0 * (npes_*nrods_*sizeof(int)) // x[ipe][irod]
36   + localsize_[mype_] * (3.0*sizeof(int) + 10 * sizeof(double));
37 }
memsize(void) const38 double Basis::memsize(void) const { return npes_*localmemsize(); }
39 
comm(void) const40 MPI_Comm Basis::comm(void) const { return comm_; }
41 
cell() const42 const UnitCell& Basis::cell() const { return cell_; }
refcell() const43 const UnitCell& Basis::refcell() const { return refcell_; }
idxmin(int i) const44 int Basis::idxmin(int i) const { return idxmin_[i]; }
idxmax(int i) const45 int Basis::idxmax(int i) const { return idxmax_[i]; }
ecut() const46 double Basis::ecut() const { return ecut_; }
47 
size() const48 int Basis::size() const { return size_; }
localsize() const49 int Basis::localsize() const { return localsize_[mype_]; }
localsize(int ipe) const50 int Basis::localsize(int ipe) const { return localsize_[ipe]; }
maxlocalsize() const51 int Basis::maxlocalsize() const { return maxlocalsize_; }
minlocalsize() const52 int Basis::minlocalsize() const { return minlocalsize_; }
53 
nrods() const54 int Basis::nrods() const { return nrods_; }
nrod_loc() const55 int Basis::nrod_loc() const { return nrod_loc_[mype_]; }
nrod_loc(int ipe) const56 int Basis::nrod_loc(int ipe) const { return nrod_loc_[ipe]; }
57 
rod_h(int irod) const58 int Basis::rod_h(int irod) const
59 { return rod_h_[mype_][irod]; }
rod_h(int ipe,int irod) const60 int Basis::rod_h(int ipe, int irod) const
61 { return rod_h_[ipe][irod]; }
62 
rod_k(int irod) const63 int Basis::rod_k(int irod) const { return rod_k_[mype_][irod]; }
rod_k(int ipe,int irod) const64 int Basis::rod_k(int ipe, int irod) const { return rod_k_[ipe][irod]; }
65 
rod_lmin(int irod) const66 int Basis::rod_lmin(int irod) const { return rod_lmin_[mype_][irod]; }
rod_lmin(int ipe,int irod) const67 int Basis::rod_lmin(int ipe, int irod) const { return rod_lmin_[ipe][irod]; }
68 
69 // size of rod irod on current process
rod_size(int irod) const70 int Basis::rod_size(int irod) const { return rod_size_[mype_][irod]; }
rod_size(int ipe,int irod) const71 int Basis::rod_size(int ipe, int irod) const { return rod_size_[ipe][irod]; }
72 
73 // position of first elem. of rod irod in the local list of g vectors
rod_first(int irod) const74 int Basis::rod_first(int irod) const { return rod_first_[mype_][irod]; }
rod_first(int ipe,int irod) const75 int Basis::rod_first(int ipe, int irod) const { return rod_first_[ipe][irod]; }
76 
idx(int i) const77 int    Basis::idx(int i) const   { return idx_[i]; }
g(int i) const78 double Basis::g(int i) const     { return g_[i]; }
kpg(int i) const79 double Basis::kpg(int i) const   { return kpg_[i]; }
gi(int i) const80 double Basis::gi(int i) const    { return gi_[i]; }
kpgi(int i) const81 double Basis::kpgi(int i) const  { return kpgi_[i]; }
g2(int i) const82 double Basis::g2(int i) const    { return g2_[i]; }
kpg2(int i) const83 double Basis::kpg2(int i) const  { return kpg2_[i]; }
g2i(int i) const84 double Basis::g2i(int i) const   { return g2i_[i]; }
kpg2i(int i) const85 double Basis::kpg2i(int i) const { return kpg2i_[i]; }
gx(int i) const86 double Basis::gx(int i) const    { return gx_[i]; }
kpgx(int i) const87 double Basis::kpgx(int i) const  { return kpgx_[i]; }
isort(int i) const88 int    Basis::isort(int i) const { return isort_loc[i]; }
89 
idx_ptr(void) const90 const int*    Basis::idx_ptr(void) const   { return &(idx_[0]); }
g_ptr(void) const91 const double* Basis::g_ptr(void)  const    { return &(g_[0]); }
kpg_ptr(void) const92 const double* Basis::kpg_ptr(void)  const  { return &(kpg_[0]); }
gi_ptr(void) const93 const double* Basis::gi_ptr(void) const    { return &(gi_[0]); }
kpgi_ptr(void) const94 const double* Basis::kpgi_ptr(void) const  { return &(kpgi_[0]); }
g2_ptr(void) const95 const double* Basis::g2_ptr(void) const    { return &(g2_[0]); }
kpg2_ptr(void) const96 const double* Basis::kpg2_ptr(void) const  { return &(kpg2_[0]); }
g2i_ptr(void) const97 const double* Basis::g2i_ptr(void) const   { return &(g2i_[0]); }
kpg2i_ptr(void) const98 const double* Basis::kpg2i_ptr(void) const { return &(kpg2i_[0]); }
gx_ptr(int j) const99 const double* Basis::gx_ptr(int j) const
100 { return &(gx_[j*localsize_[mype_]]); }
kpgx_ptr(int j) const101 const double* Basis::kpgx_ptr(int j) const
102 { return &(kpgx_[j*localsize_[mype_]]); }
103 
104 ////////////////////////////////////////////////////////////////////////////////
factorizable(int n) const105 bool Basis::factorizable(int n) const
106 {
107   // next lines: use AIX criterion for all platforms (AIX and fftw)
108 
109 //#if AIX
110 
111   // Acceptable lengths for FFTs in the ESSL library:
112   // n = (2^h) (3^i) (5^j) (7^k) (11^m) for n <= 37748736
113   // where:
114   //  h = 1, 2, ..., 25
115   //  i = 0, 1, 2
116   //  j, k, m = 0, 1
117   if ( n % 11 == 0 ) n /= 11;
118   if ( n % 7 == 0 ) n /= 7;
119   if ( n % 5 == 0 ) n /= 5;
120   if ( n % 3 == 0 ) n /= 3;
121   if ( n % 3 == 0 ) n /= 3;
122   // the bound h <= 25 is not tested since 2^25 would cause
123   // memory allocation problems
124   while ( ( n % 2 == 0 ) ) n /= 2;
125   return ( n == 1 );
126 
127 // #else
128 //   while ( n % 5 == 0 ) n /= 5;
129 //   while ( n % 3 == 0 ) n /= 3;
130 //   while ( n % 2 == 0 ) n /= 2;
131 //   return ( n == 1 );
132 // #endif
133 }
134 
135 ////////////////////////////////////////////////////////////////////////////////
np(int i) const136 int Basis::np(int i) const { return np_[i]; }
137 
138 ////////////////////////////////////////////////////////////////////////////////
fits_in_grid(int np0,int np1,int np2) const139 bool Basis::fits_in_grid(int np0, int np1, int np2) const
140 { return
141   ( idxmax_[0] < np0/2 ) && ( idxmin_[0] >= -np0/2 ) &&
142   ( idxmax_[1] < np1/2 ) && ( idxmin_[1] >= -np1/2 ) &&
143   ( idxmax_[2] < np2/2 ) && ( idxmin_[2] >= -np2/2 );
144 }
145 
146 ////////////////////////////////////////////////////////////////////////////////
kpoint(void) const147 const D3vector Basis::kpoint(void) const { return kpoint_; }
148 
149 ////////////////////////////////////////////////////////////////////////////////
real(void) const150 bool Basis::real(void) const { return real_; }
151 
sqr(double x)152 inline double sqr( double x ) { return x*x; }
swap(int & x,int & y)153 inline void swap(int &x, int &y) { int tmp = x; x = y; y = tmp; }
154 
155 ////////////////////////////////////////////////////////////////////////////////
156 class Rod
157 {
158   // z-column of non-zero reciprocal lattice vectors
159   int h_, k_, lmin_, size_;
160 
161   public:
Rod(int h,int k,int lmin,int size)162   Rod(int h, int k, int lmin, int size) : h_(h), k_(k),
163   lmin_(lmin), size_(size) {}
164 
h(void) const165   int h(void) const { return h_; }
k(void) const166   int k(void) const { return k_; }
lmin(void) const167   int lmin(void) const { return lmin_; }
size(void) const168   int size(void) const { return size_; }
operator <(const Rod & x) const169   bool operator< (const Rod& x) const
170   {
171     return size_ < x.size();
172   }
173 };
174 
175 ////////////////////////////////////////////////////////////////////////////////
176 class Node
177 {
178   int id_, nrods_, size_;
179 
180   public:
Node()181   Node() : id_(0), nrods_(0), size_(0) {}
Node(int id)182   Node(int id) : id_(id), nrods_(0), size_(0) {}
183 
id(void) const184   int id(void) const { return id_; }
nrods(void) const185   int nrods(void) const { return nrods_; }
size(void) const186   int size(void) const { return size_; }
187 
addrod(const Rod & r)188   void addrod(const Rod& r)
189   {
190     nrods_++;
191     size_ += r.size();
192   }
operator <(const Node & x) const193   bool operator< (const Node& x) const
194   {
195     return size_ < x.size();
196   }
197 };
198 
199 ////////////////////////////////////////////////////////////////////////////////
200 template <class T>
201 struct ptr_less
202 {
203   public:
operator ()ptr_less204   bool operator() ( T* x, T* y ) { return *x < *y; }
205 };
206 template <class T>
207 struct ptr_greater
208 {
209   public:
operator ()ptr_greater210   bool operator() ( T* x, T* y ) { return *y < *x; }
211 };
212 
213 ////////////////////////////////////////////////////////////////////////////////
214 template <class T>
215 struct VectorLess
216 {
217   // function object for indirect comparison of vector elements
218   public:
219   vector<T>& a_;
220   VectorLess<T>(vector<T>& a) : a_(a) {};
operator ()VectorLess221   bool operator() (int i, int j) const
222   {
223     return a_[i] < a_[j];
224   }
225 };
226 
227 ////////////////////////////////////////////////////////////////////////////////
Basis(MPI_Comm comm,D3vector kpoint)228 Basis::Basis(MPI_Comm comm, D3vector kpoint) : comm_(comm)
229 {
230   // Construct the default empty basis
231   // cell and refcell are (0,0,0)
232   MPI_Comm_rank(comm_,&mype_);
233   MPI_Comm_size(comm_,&npes_);
234 
235   ecut_ = 0.0;
236   kpoint_ = kpoint;
237   real_ = ( kpoint == D3vector(0.0,0.0,0.0) );
238 
239   localsize_.resize(npes_);
240   nrod_loc_.resize(npes_);
241   rod_h_.resize(npes_);
242   rod_k_.resize(npes_);
243   rod_lmin_.resize(npes_);
244   rod_size_.resize(npes_);
245   rod_first_.resize(npes_);
246 
247   // resize with zero cutoff to initialize empty Basis
248   resize(cell_,refcell_,0.0);
249 }
250 
251 ////////////////////////////////////////////////////////////////////////////////
~Basis(void)252 Basis::~Basis(void) {}
253 
254 ////////////////////////////////////////////////////////////////////////////////
resize(const UnitCell & cell,const UnitCell & refcell,double ecut)255 bool Basis::resize(const UnitCell& cell, const UnitCell& refcell,
256   double ecut)
257 {
258   assert(ecut>=0.0);
259   assert(cell.volume() >= 0.0);
260   assert(refcell.volume() >= 0.0);
261 
262   if ( ecut == ecut_ && refcell == refcell_ && refcell_.volume() != 0.0 )
263   {
264     cell_ = cell;
265     // only the cell changes, ecut and the refcell remain unchanged
266     update_g();
267     return true;
268   }
269 
270   ecut_ = ecut;
271   cell_ = cell;
272   refcell_ = refcell;
273 
274   if ( ecut == 0.0 || cell.volume() == 0.0)
275   {
276     idxmax_[0] = 0;
277     idxmax_[1] = 0;
278     idxmax_[2] = 0;
279     idxmin_[0] = 0;
280     idxmin_[1] = 0;
281     idxmin_[2] = 0;
282 
283     size_ = 0;
284     nrods_ = 0;
285     for ( int ipe = 0; ipe < npes_; ipe++ )
286     {
287       localsize_[ipe] = 0;
288       nrod_loc_[ipe] = 0;
289     }
290     maxlocalsize_ = minlocalsize_ = 0;
291     np_[0] = np_[1] = np_[2] = 0;
292     idx_.resize(3*localsize_[mype_]);
293     g_.resize(localsize_[mype_]);
294     kpg_.resize(localsize_[mype_]);
295     gi_.resize(localsize_[mype_]);
296     kpgi_.resize(localsize_[mype_]);
297     g2_.resize(localsize_[mype_]);
298     kpg2_.resize(localsize_[mype_]);
299     g2i_.resize(localsize_[mype_]);
300     kpg2i_.resize(localsize_[mype_]);
301     gx_.resize(3*localsize_[mype_]);
302     kpgx_.resize(3*localsize_[mype_]);
303     isort_loc.resize(localsize_[mype_]);
304     return true;
305   }
306 
307   const double two_ecut = 2.0 * ecut;
308   const double twopi = 2.0 * M_PI;
309 
310   const double kpx = kpoint_.x;
311   const double kpy = kpoint_.y;
312   const double kpz = kpoint_.z;
313 
314   UnitCell defcell;
315   // defcell: cell used to define which vectors are contained in the Basis
316   // if refcell is defined, defcell = refcell
317   // otherwise, defcell = cell
318   if ( norm2(refcell.b(0)) + norm2(refcell.b(1)) + norm2(refcell.b(2)) == 0.0 )
319   {
320     defcell = cell;
321   }
322   else
323   {
324     defcell = refcell;
325   }
326 
327   const D3vector b0 = defcell.b(0);
328   const D3vector b1 = defcell.b(1);
329   const D3vector b2 = defcell.b(2);
330 
331   const double normb2 = norm2(b2);
332   const double b2inv2 = 1.0 / normb2;
333 
334   const D3vector kp = kpx*b0 + kpy*b1 + kpz*b2;
335 
336   if ( !cell.in_bz(kp) )
337   {
338     if ( mype_ == 0 )
339       cout << " Basis::resize: warning: " << kpoint_
340            << " out of the BZ: " << kp << endl;
341   }
342 
343   const double fac = sqrt(two_ecut) / twopi;
344 
345   // define safe enclosing domain for any k-point value in the BZ
346   const int hmax = (int) ( 1.5 + fac * ( length(defcell.a(0) ) ) );
347   const int hmin = - hmax;
348 
349   const int kmax = (int) ( 1.5 + fac * ( length(defcell.a(1) ) ) );
350   const int kmin = - kmax;
351 
352   const int lmax = (int) ( 1.5 + fac * ( length(defcell.a(2) ) ) );
353   const int lmin = - lmax;
354 
355   multiset<Rod> rodset;
356 
357   // build rod set
358 
359   int hmax_used = hmin;
360   int hmin_used = hmax;
361   int kmax_used = kmin;
362   int kmin_used = kmax;
363   int lmax_used = lmin;
364   int lmin_used = lmax;
365 
366   if ( real_ )
367   {
368     // build basis at kpoint (0,0,0)
369 
370     // rod(0,0,0)
371     // length of rod(0,0,0) is lend+1
372     int lend = (int) ( sqrt(two_ecut * b2inv2) );
373     size_ = lend + 1;
374     rodset.insert(Rod(0,0,0,lend+1));
375     nrods_ = 1;
376 
377     hmax_used = 0;
378     hmin_used = 0;
379     kmin_used = 0;
380     kmax_used = 0;
381     lmin_used = 0;
382     lmax_used = lend;
383 
384     // rods (0,k,l)
385     for ( int k = 1; k <= kmax; k++ )
386     {
387       int lstart=lmax,lend=lmin;
388       bool found = false;
389       for ( int l = lmin; l <= lmax; l++ )
390       {
391         const double two_e = norm2(k*b1+l*b2);
392         if ( two_e < two_ecut )
393         {
394           lstart = min(l,lstart);
395           lend = max(l,lend);
396           found = true;
397         }
398       }
399       if ( found )
400       {
401         // non-zero intersection was found
402         const int rodsize = lend - lstart + 1;
403         size_ += rodsize;
404         rodset.insert(Rod(0,k,lstart,rodsize));
405         nrods_++;
406         kmax_used = max(k,kmax_used);
407         kmin_used = min(k,kmin_used);
408         lmax_used = max(lend,lmax_used);
409         lmin_used = min(lstart,lmin_used);
410       }
411     }
412     // rods (h,k,l) h>0
413     for ( int h = 1; h <= hmax; h++ )
414     {
415       for ( int k = kmin; k <= kmax; k++ )
416       {
417         int lstart=lmax,lend=lmin;
418         bool found = false;
419         for ( int l = lmin; l <= lmax; l++ )
420         {
421           const double two_e = norm2(h*b0+k*b1+l*b2);
422           if ( two_e < two_ecut )
423           {
424             lstart = min(l,lstart);
425             lend = max(l,lend);
426             found = true;
427           }
428         }
429         if ( found )
430         {
431           // non-zero intersection was found
432           const int rodsize = lend - lstart + 1;
433           size_ += rodsize;
434           rodset.insert(Rod(h,k,lstart,rodsize));
435           nrods_++;
436           hmax_used = max(h,hmax_used);
437           hmin_used = min(h,hmin_used);
438           kmax_used = max(k,kmax_used);
439           kmin_used = min(k,kmin_used);
440           lmax_used = max(lend,lmax_used);
441           lmin_used = min(lstart,lmin_used);
442         }
443       }
444     }
445   }
446   else
447   {
448     // build Basis for k != (0,0,0)
449 
450     size_ = 0;
451     nrods_ = 0;
452     // rods (h,k,l)
453     for ( int h = hmin; h <= hmax; h++ )
454     {
455       for ( int k = kmin; k <= kmax; k++ )
456       {
457         int lstart=lmax,lend=lmin;
458         bool found = false;
459         for ( int l = lmin; l <= lmax; l++ )
460         {
461           const double two_e = norm2((kpx+h)*b0 + (kpy+k)*b1 + (kpz+l)*b2);
462           if ( two_e < two_ecut )
463           {
464             lstart = min(l,lstart);
465             lend = max(l,lend);
466             found = true;
467           }
468         }
469         if ( found )
470         {
471           // non-zero intersection was found
472           const int rodsize = lend - lstart + 1;
473           size_ += rodsize;
474           rodset.insert(Rod(h,k,lstart,rodsize));
475           nrods_++;
476           hmax_used = max(h,hmax_used);
477           hmin_used = min(h,hmin_used);
478           kmax_used = max(k,kmax_used);
479           kmin_used = min(k,kmin_used);
480           lmax_used = max(lend,lmax_used);
481           lmin_used = min(lstart,lmin_used);
482         }
483       }
484     }
485   }
486 
487 #if DEBUG
488   cout << " hmin/hmax: " << hmin << " / " << hmax << endl;
489   cout << " kmin/kmax: " << kmin << " / " << kmax << endl;
490   cout << " lmin/lmax: " << lmin << " / " << lmax << endl;
491   cout << " hmin/hmax used: " << hmin_used << " / " << hmax_used << endl;
492   cout << " kmin/kmax used: " << kmin_used << " / " << kmax_used << endl;
493   cout << " lmin/lmax used: " << lmin_used << " / " << lmax_used << endl;
494 #endif
495 
496   idxmax_[0] = hmax_used;
497   idxmin_[0] = hmin_used;
498 
499   idxmax_[1] = kmax_used;
500   idxmin_[1] = kmin_used;
501 
502   idxmax_[2] = lmax_used;
503   idxmin_[2] = lmin_used;
504 
505   assert(hmax_used - hmin_used + 1 <= 2 * hmax);
506   assert(kmax_used - kmin_used + 1 <= 2 * kmax);
507   assert(lmax_used - lmin_used + 1 <= 2 * lmax);
508 
509   // compute good FFT sizes
510   // use values independent of the kpoint
511   int n;
512   n = 2 * hmax;
513   while ( !factorizable(n) ) n+=2;
514   np_[0] = n;
515   n = 2 * kmax;
516   while ( !factorizable(n) ) n+=2;
517   np_[1] = n;
518   n = 2 * lmax;
519   while ( !factorizable(n) ) n+=2;
520   np_[2] = n;
521 
522   // Distribute the basis on npes_ processors
523 
524   // build a min-heap of Nodes
525 
526   vector<Node*> nodes(npes_);
527 
528   for ( int ipe = 0; ipe < npes_; ipe++ )
529   {
530     nodes[ipe] = new Node(ipe);
531     localsize_[ipe] = 0;
532     nrod_loc_[ipe] = 0;
533     rod_h_[ipe].resize(0);
534     rod_k_[ipe].resize(0);
535     rod_lmin_[ipe].resize(0);
536     rod_size_[ipe].resize(0);
537   }
538 
539   // nodes contains a valid min-heap of zero-size Nodes
540 
541   // insert rods into the min-heap
542   // keep track of where rod(0,0,0) goes
543   int pe_rod0 = -1, rank_rod0 = -1;
544   multiset<Rod>::iterator p = rodset.begin();
545   while ( p != rodset.end() )
546   {
547     // pop smallest element
548     pop_heap(nodes.begin(), nodes.end(), ptr_greater<Node>());
549 
550     // add rod size to smaller element
551     nodes[npes_-1]->addrod(*p);
552     int ipe = nodes[npes_-1]->id();
553 
554     // update info about rod on process ipe
555     nrod_loc_[ipe]++;
556     rod_h_[ipe].push_back(p->h());
557     rod_k_[ipe].push_back(p->k());
558     rod_lmin_[ipe].push_back(p->lmin());
559     rod_size_[ipe].push_back(p->size());
560     localsize_[ipe] += p->size();
561     if ( p->h() == 0 && p->k() == 0 )
562     {
563       pe_rod0 = ipe;
564       rank_rod0 = nodes[npes_-1]->nrods()-1;
565     }
566 
567     // push modified element back in the heap
568     push_heap(nodes.begin(), nodes.end(), ptr_greater<Node>());
569 
570     p++;
571   }
572 
573   maxlocalsize_ = (*max_element(nodes.begin(), nodes.end(),
574     ptr_less<Node>()))->size();
575   minlocalsize_ = (*min_element(nodes.begin(), nodes.end(),
576     ptr_less<Node>()))->size();
577 
578   for ( int ipe = 0; ipe < npes_; ipe++ )
579   {
580     delete nodes[ipe];
581   }
582 
583   // swap node pe_rod0 with node 0 in order to have rod(0,0,0) on node 0
584   swap(nrod_loc_[0], nrod_loc_[pe_rod0]);
585   rod_h_[pe_rod0].swap(rod_h_[0]);
586   rod_k_[pe_rod0].swap(rod_k_[0]);
587   rod_lmin_[pe_rod0].swap(rod_lmin_[0]);
588   rod_size_[pe_rod0].swap(rod_size_[0]);
589   swap(localsize_[0], localsize_[pe_rod0]);
590   //Node *tmpnodeptr = nodes[0]; nodes[0] = nodes[pe_rod0];
591   //  nodes[pe_rod0]=tmpnodeptr;
592 
593   // reorder rods on node 0 so that rod(0,0,0) comes first
594   swap(rod_h_[0][rank_rod0], rod_h_[0][0]);
595   swap(rod_k_[0][rank_rod0], rod_k_[0][0]);
596   swap(rod_lmin_[0][rank_rod0], rod_lmin_[0][0]);
597   swap(rod_size_[0][rank_rod0], rod_size_[0][0]);
598 
599   // compute position of first element of rod (ipe,irod)
600   for ( int ipe = 0; ipe < npes_; ipe++ )
601   {
602     rod_first_[ipe].resize(nrod_loc_[ipe]);
603     if ( nrod_loc_[ipe] > 0 )
604       rod_first_[ipe][0] = 0;
605     for ( int irod = 1; irod < nrod_loc_[ipe]; irod++ )
606     {
607       rod_first_[ipe][irod] = rod_first_[ipe][irod-1] + rod_size_[ipe][irod-1];
608     }
609   }
610 
611   // local arrays idx, g, gi, g2i, g2, gx
612   idx_.resize(3*localsize_[mype_]);
613   int i = 0;
614   for ( int irod = 0; irod < nrod_loc_[mype_]; irod++ )
615   {
616     for ( int l = 0; l < rod_size_[mype_][irod]; l++ )
617     {
618       idx_[3*i]   = rod_h_[mype_][irod];
619       idx_[3*i+1] = rod_k_[mype_][irod];
620       idx_[3*i+2] = rod_lmin_[mype_][irod] + l;
621 
622       i++;
623     }
624   }
625 
626   g_.resize(localsize_[mype_]);
627   kpg_.resize(localsize_[mype_]);
628   gi_.resize(localsize_[mype_]);
629   kpgi_.resize(localsize_[mype_]);
630   g2_.resize(localsize_[mype_]);
631   kpg2_.resize(localsize_[mype_]);
632   g2i_.resize(localsize_[mype_]);
633   kpg2i_.resize(localsize_[mype_]);
634   gx_.resize(3*localsize_[mype_]);
635   kpgx_.resize(3*localsize_[mype_]);
636   isort_loc.resize(localsize_[mype_]);
637 
638   update_g();
639 
640   // basis set construction is complete
641 
642   return true;
643 }
644 
645 ////////////////////////////////////////////////////////////////////////////////
update_g(void)646 void Basis::update_g(void)
647 {
648   // compute the values of g, kpg, gi, g2i, g2, kpg2, gx
649   // N.B. use the values of cell (not defcell)
650 
651   const int locsize = localsize_[mype_];
652   for ( int i = 0; i < locsize; i++ )
653   {
654     D3vector gt = idx_[3*i+0] * cell_.b(0) +
655                   idx_[3*i+1] * cell_.b(1) +
656                   idx_[3*i+2] * cell_.b(2);
657 
658     D3vector kpgt = (kpoint_.x + idx_[3*i+0]) * cell_.b(0) +
659                     (kpoint_.y + idx_[3*i+1]) * cell_.b(1) +
660                     (kpoint_.z + idx_[3*i+2]) * cell_.b(2);
661 
662     gx_[i] = gt.x;
663     gx_[locsize+i] = gt.y;
664     gx_[locsize+locsize+i] = gt.z;
665     kpgx_[i] = kpgt.x;
666     kpgx_[locsize+i] = kpgt.y;
667     kpgx_[locsize+locsize+i] = kpgt.z;
668 
669     g2_[i] = norm2(gt);
670     g_[i] = sqrt( g2_[i] );
671 
672     kpg2_[i] = norm2(kpgt);
673     kpg_[i] = sqrt( kpg2_[i] );
674 
675     gi_[i] = g_[i] > 0.0 ? 1.0 / g_[i] : 0.0;
676     kpgi_[i] = kpg_[i] > 0.0 ? 1.0 / kpg_[i] : 0.0;
677     g2i_[i] = gi_[i] * gi_[i];
678     kpg2i_[i] = kpgi_[i] * kpgi_[i];
679     isort_loc[i] = i;
680   }
681 
682   VectorLess<double> g2_less(g2_);
683   sort(isort_loc.begin(), isort_loc.end(), g2_less);
684 #if DEBUG
685   for ( int i = 0; i < locsize; i++ )
686   {
687     cout << mype_ << " sorted " << i << " " << g2_[isort_loc[i]] << endl;
688   }
689 #endif
690 }
691 
692 ////////////////////////////////////////////////////////////////////////////////
print(ostream & os)693 void Basis::print(ostream& os)
694 {
695   os << mype_ << ": ";
696   os << " Basis.kpoint():   " << kpoint() << endl;
697   os << mype_ << ": ";
698   os << " Basis.kpoint():   " << kpoint().x << " * b0 + "
699                               << kpoint().y << " * b1 + "
700                               << kpoint().z << " * b2" << endl;
701   os << mype_ << ": ";
702   os << " Basis.kpoint():   " << kpoint().x * cell().b(0) +
703                                  kpoint().y * cell().b(1) +
704                                  kpoint().z * cell().b(2) << endl;
705   os << mype_ << ": ";
706   os << " Basis.cell():     " << endl << cell() << endl;
707   os << mype_ << ": ";
708   os << " Basis.ref cell(): " << endl << refcell() << endl;
709   os << mype_ << ": ";
710   os << " Basis.ecut():     " << ecut() << endl;
711   os << mype_ << ": ";
712   os << " Basis.np(0,1,2):  " << np(0) << " "
713        << np(1) << " " << np(2) << endl;
714   os << mype_ << ": ";
715   os << " Basis.idxmin:       " << idxmin(0) << " "
716        << idxmin(1) << " " << idxmin(2) << endl;
717   os << mype_ << ": ";
718   os << " Basis.idxmax:       " << idxmax(0) << " "
719        << idxmax(1) << " " << idxmax(2) << endl;
720   os << mype_ << ": ";
721   os << " Basis.size():     " << size() << endl;
722   os << mype_ << ": ";
723   os << " Basis.localsize(): " << localsize() << endl;
724   os << mype_ << ": ";
725   os << " Basis.nrods():    " << nrods() << endl;
726   os << mype_ << ": ";
727   os << " Basis.real():     " << real() << endl;
728   os << mype_ << ": ";
729   os << " Basis total mem size (MB): " << memsize() / 1048576 << endl;
730   os << mype_ << ": ";
731   os << " Basis local mem size (MB): " << localmemsize() / 1048576 << endl;
732 
733   os << mype_ << ": ";
734   os << "   ig      i   j   k        gx      gy      gz       |k+g|^2"
735      << endl;
736   os << mype_ << ": ";
737   os << "   --      -   -   -        --      --      --       -------"
738      << endl;
739   for ( int i = 0; i < localsize(); i++ )
740   {
741     os << mype_ << ": ";
742     os << setw(5) << i << "   "
743        << setw(4) << idx(3*i)
744        << setw(4) << idx(3*i+1)
745        << setw(4) << idx(3*i+2)
746        << "    "
747        << setw(8) << setprecision(4) << gx(i)
748        << setw(8) << setprecision(4) << gx(i+localsize())
749        << setw(8) << setprecision(4) << gx(i+2*localsize())
750        << setw(12) << setprecision(4) << 0.5 * kpg2(i)
751        << endl;
752   }
753 }
754 
755 ////////////////////////////////////////////////////////////////////////////////
operator <<(ostream & os,Basis & b)756 ostream& operator<<(ostream& os, Basis& b)
757 {
758   b.print(os);
759   return os;
760 }
761