1 //
2 // compute_ikjy.cc
3 //
4 // Copyright (C) 2004 Edward Valeev
5 //
6 // Author: Edward Valeev <edward.valeev@chemistry.gatech.edu>
7 // Maintainer: EV
8 //
9 // This file is part of the SC Toolkit.
10 //
11 // The SC Toolkit is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU Library General Public License as published by
13 // the Free Software Foundation; either version 2, or (at your option)
14 // any later version.
15 //
16 // The SC Toolkit is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU Library General Public License for more details.
20 //
21 // You should have received a copy of the GNU Library General Public License
22 // along with the SC Toolkit; see the file COPYING.LIB.  If not, write to
23 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 //
25 // The U.S. Government is granted a limited license as per AL 91-7.
26 //
27 
28 #include <stdexcept>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32 #include <limits.h>
33 
34 #include <scconfig.h>
35 #include <util/misc/formio.h>
36 #include <util/misc/timer.h>
37 #include <util/class/class.h>
38 #include <util/state/state.h>
39 #include <util/state/state_text.h>
40 #include <util/state/state_bin.h>
41 #include <math/scmat/matrix.h>
42 #include <chemistry/qc/mbpt/bzerofast.h>
43 #include <chemistry/qc/mbptr12/transform_ikjy.h>
44 #include <chemistry/qc/mbptr12/blas.h>
45 #include <chemistry/qc/mbptr12/transform_123inds.h>
46 
47 using namespace std;
48 using namespace sc;
49 
50 #define SINGLE_THREAD_E123   0
51 #define PRINT3Q 0
52 #define PRINT4Q 0
53 #define PRINT_NUM_TE_TYPES 1
54 #define CHECK_INTS_SYMM 1
55 
56 /*-------------------------------------
57   Based on MBPT2::compute_mp2_energy()
58  -------------------------------------*/
59 void
compute()60 TwoBodyMOIntsTransform_ikjy::compute()
61 {
62   init_acc();
63   if (ints_acc_->is_committed())
64     return;
65 
66   Ref<Integral> integral = factory_->integral();
67   Ref<GaussianBasisSet> bs1 = space1_->basis();
68   Ref<GaussianBasisSet> bs2 = space2_->basis();
69   Ref<GaussianBasisSet> bs3 = space3_->basis();
70   Ref<GaussianBasisSet> bs4 = space4_->basis();
71   int rank1 = space1_->rank();
72   int rank2 = space2_->rank();
73   int rank3 = space3_->rank();
74   int rank4 = space4_->rank();
75   int nbasis1 = bs1->nbasis();
76   int nbasis2 = bs2->nbasis();
77   int nbasis3 = bs3->nbasis();
78   int nbasis4 = bs4->nbasis();
79   int nshell1 = bs1->nshell();
80   int nshell2 = bs2->nshell();
81   int nshell3 = bs3->nshell();
82   int nshell4 = bs4->nshell();
83   int nfuncmax1 = bs1->max_nfunction_in_shell();
84   int nfuncmax2 = bs2->max_nfunction_in_shell();
85   int nfuncmax3 = bs3->max_nfunction_in_shell();
86   int nfuncmax4 = bs4->max_nfunction_in_shell();
87   enum te_types {eri=0, r12=1, r12t1=2};
88   const size_t memgrp_blocksize = memgrp_blksize();
89 
90   // log2 of the erep tolerance
91   // (erep < 2^tol => discard)
92   const int tol = (int) (-10.0/log10(2.0));  // discard ints smaller than 10^-20
93 
94   int aoint_computed = 0;
95 
96   std::string tim_label("tbint_tform_");
97   tim_label += type(); tim_label += " "; tim_label += name();
98   tim_enter(tim_label.c_str());
99 
100   print_header();
101 
102   // Compute the storage remaining for the integral routines
103   size_t dyn_mem = distsize_to_size(compute_transform_dynamic_memory_(batchsize_));
104 
105   int me = msg_->me();
106   int nproc = msg_->n();
107   const int restart_orb = restart_orbital();
108   int nijmax = compute_nij(batchsize_,rank3,nproc,me);
109 
110   vector<int> mosym1 = space1_->mosym();
111   vector<int> mosym2 = space2_->mosym();
112   vector<int> mosym3 = space3_->mosym();
113   vector<int> mosym4 = space4_->mosym();
114   double** vector4 = new double*[nbasis4];
115   vector4[0] = new double[rank4*nbasis4];
116   for(int i=1; i<nbasis4; i++) vector4[i] = vector4[i-1] + rank4;
117   space4_->coefs().convert(vector4);
118 
119   /////////////////////////////////////
120   //  Begin transformation loops
121   /////////////////////////////////////
122 
123   // debug print
124   if (debug_ >= 2) {
125     ExEnv::outn() << indent
126 		  << scprintf("node %i, begin loop over i-batches",me) << endl;
127   }
128   // end of debug print
129 
130   // Initialize the integrals
131   integral->set_storage(memory_ - dyn_mem);
132   integral->set_basis(space1_->basis(),space2_->basis(),space3_->basis(),space4_->basis());
133   Ref<TwoBodyInt>* tbints = new Ref<TwoBodyInt>[thr_->nthread()];
134   for (int i=0; i<thr_->nthread(); i++) {
135     tbints[i] = integral->grt();
136   }
137   if (debug_ >= 1)
138     ExEnv::out0() << indent << scprintf("Memory used for integral storage:       %i Bytes",
139       integral->storage_used()) << endl;
140 
141   Ref<ThreadLock> lock = thr_->new_lock();
142   TwoBodyMOIntsTransform_123Inds** e123thread = new TwoBodyMOIntsTransform_123Inds*[thr_->nthread()];
143   for (int i=0; i<thr_->nthread(); i++) {
144     e123thread[i] = new TwoBodyMOIntsTransform_123Inds(this,i,thr_->nthread(),lock,tbints[i],-100.0,debug_);
145   }
146 
147   /*-----------------------------------
148 
149     Start the integrals transformation
150 
151    -----------------------------------*/
152   tim_enter("mp2-r12/a passes");
153   if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
154     StateOutBin stateout(top_mole_->checkpoint_file());
155     SavableState::save_state(top_mole_,stateout);
156     ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
157   }
158 
159   for (int pass=0; pass<npass_; pass++) {
160 
161     ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
162 
163     int ni = batchsize_;
164     int i_offset = restart_orb + pass*ni;
165     if (pass == npass_ - 1)
166       ni = rank1 - batchsize_*(npass_-1);
167 
168     // Compute number of of i,j pairs on each node during current pass for
169     // two-el integrals
170     int nij = compute_nij(ni,rank3,nproc,me);;
171 
172     // debug print
173     if (debug_)
174       ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
175     // end of debug print
176 
177     // Allocate and initialize some arrays
178     // (done here to avoid having these arrays
179     // overlap with arrays allocated later)
180 
181     // Allocate (and initialize) some arrays
182 
183     double* integral_ijsx = (double*) mem_->localdata();
184     //bzerofast(integral_ijsx, (num_te_types_*nij*memgrp_blocksize));
185     memset(integral_ijsx, 0, num_te_types_*nij*memgrp_blocksize);
186     integral_ijsx = 0;
187     mem_->sync();
188     ExEnv::out0() << indent
189 		  << scprintf("Begin loop over shells (ints, 1+2+3 q.t.)") << endl;
190 
191     // Do the two electron integrals and the first three quarter transformations
192     tim_enter("ints+1qt+2qt+3qt");
193     shell_pair_data()->init();
194     for (int i=0; i<thr_->nthread(); i++) {
195       e123thread[i]->set_i_offset(i_offset);
196       e123thread[i]->set_ni(ni);
197       thr_->add_thread(i,e123thread[i]);
198 #     if SINGLE_THREAD_E123
199       e123thread[i]->run();
200 #     endif
201     }
202 #   if !SINGLE_THREAD_E123
203     thr_->start_threads();
204     thr_->wait_threads();
205 #   endif
206     tim_exit("ints+1qt+2qt+3qt");
207     ExEnv::out0() << indent << "End of loop over shells" << endl;
208 
209     mem_->sync();  // Make sure ijsx is complete on each node before continuing
210     integral_ijsx = (double*) mem_->localdata();
211 
212 #if PRINT3Q
213     if ( me == 0 ) {
214       string filename = type() + "." + name_ + ".3q.dat";
215       ios_base::openmode mode = ios_base::trunc;
216       if (pass > 0)
217         mode = ios_base::app;
218       ofstream ints_file(filename.c_str(),mode);
219       for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
220         for (int i = 0; i<ni; i++) {
221           for (int j = 0; j<rank3; j++) {
222             int ij = i*rank3+j;
223             int ij_local = ij/nproc;
224             if (ij%nproc != me)
225               continue;
226             for (int x = 0; x<rank2; x++) {
227               const double* ijsx_ints = (const double*)((size_t)integral_ijsx + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
228               for (int s = 0; s<nbasis4; s++) {
229                 double value = ijsx_ints[s*rank2+x];
230                 ints_file << scprintf("3Q: type = %d (%d %d|%d %d) = %12.8f\n",
231                                       te_type,i+i_offset,x,j,s,value);
232               }
233             }
234           }
235         }
236       }
237       ints_file.close();
238     }
239 #endif
240 
241     // Fourth quarter transform
242     ExEnv::out0() << indent << "Begin fourth q.t." << endl;
243     tim_enter("4. q.t.");
244     // Begin fourth quarter transformation;
245     // generate (ix|jy) stored as ijxy
246 
247     double* ijxy_ints = new double[rank2*rank4];
248     const size_t xy_size = rank2*rank4*sizeof(double);
249     for (int i = 0; i<ni; i++) {
250       for (int j = 0; j<rank3; j++) {
251         int ij = i*rank3+j;
252         int ij_local = ij/nproc;
253         if (ij%nproc == me) {
254 
255           for(int te_type=0; te_type<num_te_types_; te_type++) {
256             const double *sx_ptr = (const double*) ((size_t)integral_ijsx + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
257 
258             // fourth quarter transform
259             // yx = ys * sx
260             const char notransp = 'n';
261             const char transp = 't';
262             const double one = 1.0;
263             const double zero = 0.0;
264             F77_DGEMM(&notransp,&transp,&rank4,&rank2,&nbasis4,&one,vector4[0],&rank4,
265                       sx_ptr,&rank2,&zero,ijxy_ints,&rank4);
266 
267             // copy the result back to integrals_ijsx
268             memcpy((void*)sx_ptr,(const void*)ijxy_ints,xy_size);
269           }
270         }
271       }
272     }
273     delete[] ijxy_ints;
274     tim_exit("4. q.t.");
275     ExEnv::out0() << indent << "End of fourth q.t." << endl;
276 
277     integral_ijsx = 0;
278     double* integral_ijxy = (double*) mem_->localdata();
279 
280     // Zero out nonsymmetric integrals -- Pitzer theorem in action
281     {
282       for (int i = 0; i<ni; i++) {
283         for (int j = 0; j<rank3; j++) {
284           int ij = i*rank3+j;
285           int ij_local = ij/nproc;
286           if (ij%nproc == me) {
287             const int ij_sym = mosym1[i+i_offset] ^ mosym3[j];
288             for(int te_type=0; te_type<num_te_types_; te_type++) {
289               double* ijxy_ptr = (double *)((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
290               for (int x = 0; x<rank2; x++) {
291                 const int ijx_sym = ij_sym ^ mosym2[x];
292                 for (int y = 0; y<rank4; y++, ijxy_ptr++) {
293                   if (ijx_sym ^ mosym4[y]) {
294                     *ijxy_ptr = 0.0;
295                   }
296                 }
297               }
298             }
299           }
300         }
301       }
302     }
303     // Sync up tasks before integrals are committed
304     mem_->sync();
305 
306 #if PRINT4Q
307     if ( me == 0 ) {
308       string filename = type() + "." + name_ + ".4q.dat";
309       ios_base::openmode mode = ios_base::trunc;
310       if (pass > 0)
311         mode = ios_base::app;
312       ofstream ints_file(filename.c_str(),mode);
313       for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
314         for (int i = 0; i<ni; i++) {
315           for (int j = 0; j<rank3; j++) {
316             int ij = i*rank3+j;
317             int ij_local = ij/nproc;
318             if (ij%nproc != me)
319               continue;
320             for (int x = 0; x<rank2; x++) {
321               const double* ijxy_ints = (const double*)((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
322               for (int y = 0; y<rank4; y++) {
323                 double value = ijxy_ints[x*rank4+y];
324                 ints_file << scprintf("4Q: type = %d (%d %d|%d %d) = %12.8f\n",
325                                       te_type,i+i_offset,x,j,y,value);
326               }
327             }
328           }
329         }
330       }
331       ints_file.close();
332     }
333 #endif
334 
335     // Push locally stored integrals to an accumulator
336     // This could involve storing the data to disk or simply remembering the pointer
337     tim_enter("MO ints store");
338     ints_acc_->store_memorygrp(mem_,ni,memgrp_blocksize);
339     tim_exit("MO ints store");
340     mem_->sync();
341 
342     if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
343       StateOutBin stateout(top_mole_->checkpoint_file());
344       SavableState::save_state(top_mole_,stateout);
345       ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
346     }
347 
348   } // end of loop over passes
349   tim_exit("mp2-r12/a passes");
350   if (debug_)
351     ExEnv::out0() << indent << "End of mp2-r12/a transformation" << endl;
352   // Done storing integrals - commit the content
353   // WARNING: it is not safe to use mem until deactivate has been called on the accumulator
354   //          After that deactivate the size of mem will be 0 [mem->set_localsize(0)]
355   ints_acc_->commit();
356 
357 
358   for (int i=0; i<thr_->nthread(); i++) {
359     delete e123thread[i];
360   }
361   delete[] e123thread;
362   delete[] tbints; tbints = 0;
363   delete[] vector4[0]; delete[] vector4;
364 
365   tim_exit(tim_label.c_str());
366 
367   if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint()) {
368     StateOutBin stateout(top_mole_->checkpoint_file());
369     SavableState::save_state(top_mole_,stateout);
370     ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
371   }
372 
373   print_footer();
374 
375 #if CHECK_INTS_SYMM
376   ExEnv::out0() << indent << "Detecting non-totally-symmetric integrals ... ";
377   check_int_symm();
378   ExEnv::out0() << "none" << endl;
379 #endif
380 
381 }
382 
383 
384 ////////////////////////////////////////////////////////////////////////////
385 
386 // Local Variables:
387 // mode: c++
388 // c-file-style: "CLJ-CONDENSED"
389 // End:
390