1 //
2 // compute_ikjy.cc
3 //
4 // Copyright (C) 2004 Edward Valeev
5 //
6 // Author: Edward Valeev <edward.valeev@chemistry.gatech.edu>
7 // Maintainer: EV
8 //
9 // This file is part of the SC Toolkit.
10 //
11 // The SC Toolkit is free software; you can redistribute it and/or modify
12 // it under the terms of the GNU Library General Public License as published by
13 // the Free Software Foundation; either version 2, or (at your option)
14 // any later version.
15 //
16 // The SC Toolkit is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU Library General Public License for more details.
20 //
21 // You should have received a copy of the GNU Library General Public License
22 // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
23 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
24 //
25 // The U.S. Government is granted a limited license as per AL 91-7.
26 //
27
28 #include <stdexcept>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <math.h>
32 #include <limits.h>
33
34 #include <scconfig.h>
35 #include <util/misc/formio.h>
36 #include <util/misc/timer.h>
37 #include <util/class/class.h>
38 #include <util/state/state.h>
39 #include <util/state/state_text.h>
40 #include <util/state/state_bin.h>
41 #include <math/scmat/matrix.h>
42 #include <chemistry/qc/mbpt/bzerofast.h>
43 #include <chemistry/qc/mbptr12/transform_ikjy.h>
44 #include <chemistry/qc/mbptr12/blas.h>
45 #include <chemistry/qc/mbptr12/transform_123inds.h>
46
47 using namespace std;
48 using namespace sc;
49
50 #define SINGLE_THREAD_E123 0
51 #define PRINT3Q 0
52 #define PRINT4Q 0
53 #define PRINT_NUM_TE_TYPES 1
54 #define CHECK_INTS_SYMM 1
55
56 /*-------------------------------------
57 Based on MBPT2::compute_mp2_energy()
58 -------------------------------------*/
59 void
compute()60 TwoBodyMOIntsTransform_ikjy::compute()
61 {
62 init_acc();
63 if (ints_acc_->is_committed())
64 return;
65
66 Ref<Integral> integral = factory_->integral();
67 Ref<GaussianBasisSet> bs1 = space1_->basis();
68 Ref<GaussianBasisSet> bs2 = space2_->basis();
69 Ref<GaussianBasisSet> bs3 = space3_->basis();
70 Ref<GaussianBasisSet> bs4 = space4_->basis();
71 int rank1 = space1_->rank();
72 int rank2 = space2_->rank();
73 int rank3 = space3_->rank();
74 int rank4 = space4_->rank();
75 int nbasis1 = bs1->nbasis();
76 int nbasis2 = bs2->nbasis();
77 int nbasis3 = bs3->nbasis();
78 int nbasis4 = bs4->nbasis();
79 int nshell1 = bs1->nshell();
80 int nshell2 = bs2->nshell();
81 int nshell3 = bs3->nshell();
82 int nshell4 = bs4->nshell();
83 int nfuncmax1 = bs1->max_nfunction_in_shell();
84 int nfuncmax2 = bs2->max_nfunction_in_shell();
85 int nfuncmax3 = bs3->max_nfunction_in_shell();
86 int nfuncmax4 = bs4->max_nfunction_in_shell();
87 enum te_types {eri=0, r12=1, r12t1=2};
88 const size_t memgrp_blocksize = memgrp_blksize();
89
90 // log2 of the erep tolerance
91 // (erep < 2^tol => discard)
92 const int tol = (int) (-10.0/log10(2.0)); // discard ints smaller than 10^-20
93
94 int aoint_computed = 0;
95
96 std::string tim_label("tbint_tform_");
97 tim_label += type(); tim_label += " "; tim_label += name();
98 tim_enter(tim_label.c_str());
99
100 print_header();
101
102 // Compute the storage remaining for the integral routines
103 size_t dyn_mem = distsize_to_size(compute_transform_dynamic_memory_(batchsize_));
104
105 int me = msg_->me();
106 int nproc = msg_->n();
107 const int restart_orb = restart_orbital();
108 int nijmax = compute_nij(batchsize_,rank3,nproc,me);
109
110 vector<int> mosym1 = space1_->mosym();
111 vector<int> mosym2 = space2_->mosym();
112 vector<int> mosym3 = space3_->mosym();
113 vector<int> mosym4 = space4_->mosym();
114 double** vector4 = new double*[nbasis4];
115 vector4[0] = new double[rank4*nbasis4];
116 for(int i=1; i<nbasis4; i++) vector4[i] = vector4[i-1] + rank4;
117 space4_->coefs().convert(vector4);
118
119 /////////////////////////////////////
120 // Begin transformation loops
121 /////////////////////////////////////
122
123 // debug print
124 if (debug_ >= 2) {
125 ExEnv::outn() << indent
126 << scprintf("node %i, begin loop over i-batches",me) << endl;
127 }
128 // end of debug print
129
130 // Initialize the integrals
131 integral->set_storage(memory_ - dyn_mem);
132 integral->set_basis(space1_->basis(),space2_->basis(),space3_->basis(),space4_->basis());
133 Ref<TwoBodyInt>* tbints = new Ref<TwoBodyInt>[thr_->nthread()];
134 for (int i=0; i<thr_->nthread(); i++) {
135 tbints[i] = integral->grt();
136 }
137 if (debug_ >= 1)
138 ExEnv::out0() << indent << scprintf("Memory used for integral storage: %i Bytes",
139 integral->storage_used()) << endl;
140
141 Ref<ThreadLock> lock = thr_->new_lock();
142 TwoBodyMOIntsTransform_123Inds** e123thread = new TwoBodyMOIntsTransform_123Inds*[thr_->nthread()];
143 for (int i=0; i<thr_->nthread(); i++) {
144 e123thread[i] = new TwoBodyMOIntsTransform_123Inds(this,i,thr_->nthread(),lock,tbints[i],-100.0,debug_);
145 }
146
147 /*-----------------------------------
148
149 Start the integrals transformation
150
151 -----------------------------------*/
152 tim_enter("mp2-r12/a passes");
153 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
154 StateOutBin stateout(top_mole_->checkpoint_file());
155 SavableState::save_state(top_mole_,stateout);
156 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
157 }
158
159 for (int pass=0; pass<npass_; pass++) {
160
161 ExEnv::out0() << indent << "Beginning pass " << pass+1 << endl;
162
163 int ni = batchsize_;
164 int i_offset = restart_orb + pass*ni;
165 if (pass == npass_ - 1)
166 ni = rank1 - batchsize_*(npass_-1);
167
168 // Compute number of of i,j pairs on each node during current pass for
169 // two-el integrals
170 int nij = compute_nij(ni,rank3,nproc,me);;
171
172 // debug print
173 if (debug_)
174 ExEnv::outn() << indent << "node " << me << ", nij = " << nij << endl;
175 // end of debug print
176
177 // Allocate and initialize some arrays
178 // (done here to avoid having these arrays
179 // overlap with arrays allocated later)
180
181 // Allocate (and initialize) some arrays
182
183 double* integral_ijsx = (double*) mem_->localdata();
184 //bzerofast(integral_ijsx, (num_te_types_*nij*memgrp_blocksize));
185 memset(integral_ijsx, 0, num_te_types_*nij*memgrp_blocksize);
186 integral_ijsx = 0;
187 mem_->sync();
188 ExEnv::out0() << indent
189 << scprintf("Begin loop over shells (ints, 1+2+3 q.t.)") << endl;
190
191 // Do the two electron integrals and the first three quarter transformations
192 tim_enter("ints+1qt+2qt+3qt");
193 shell_pair_data()->init();
194 for (int i=0; i<thr_->nthread(); i++) {
195 e123thread[i]->set_i_offset(i_offset);
196 e123thread[i]->set_ni(ni);
197 thr_->add_thread(i,e123thread[i]);
198 # if SINGLE_THREAD_E123
199 e123thread[i]->run();
200 # endif
201 }
202 # if !SINGLE_THREAD_E123
203 thr_->start_threads();
204 thr_->wait_threads();
205 # endif
206 tim_exit("ints+1qt+2qt+3qt");
207 ExEnv::out0() << indent << "End of loop over shells" << endl;
208
209 mem_->sync(); // Make sure ijsx is complete on each node before continuing
210 integral_ijsx = (double*) mem_->localdata();
211
212 #if PRINT3Q
213 if ( me == 0 ) {
214 string filename = type() + "." + name_ + ".3q.dat";
215 ios_base::openmode mode = ios_base::trunc;
216 if (pass > 0)
217 mode = ios_base::app;
218 ofstream ints_file(filename.c_str(),mode);
219 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
220 for (int i = 0; i<ni; i++) {
221 for (int j = 0; j<rank3; j++) {
222 int ij = i*rank3+j;
223 int ij_local = ij/nproc;
224 if (ij%nproc != me)
225 continue;
226 for (int x = 0; x<rank2; x++) {
227 const double* ijsx_ints = (const double*)((size_t)integral_ijsx + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
228 for (int s = 0; s<nbasis4; s++) {
229 double value = ijsx_ints[s*rank2+x];
230 ints_file << scprintf("3Q: type = %d (%d %d|%d %d) = %12.8f\n",
231 te_type,i+i_offset,x,j,s,value);
232 }
233 }
234 }
235 }
236 }
237 ints_file.close();
238 }
239 #endif
240
241 // Fourth quarter transform
242 ExEnv::out0() << indent << "Begin fourth q.t." << endl;
243 tim_enter("4. q.t.");
244 // Begin fourth quarter transformation;
245 // generate (ix|jy) stored as ijxy
246
247 double* ijxy_ints = new double[rank2*rank4];
248 const size_t xy_size = rank2*rank4*sizeof(double);
249 for (int i = 0; i<ni; i++) {
250 for (int j = 0; j<rank3; j++) {
251 int ij = i*rank3+j;
252 int ij_local = ij/nproc;
253 if (ij%nproc == me) {
254
255 for(int te_type=0; te_type<num_te_types_; te_type++) {
256 const double *sx_ptr = (const double*) ((size_t)integral_ijsx + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
257
258 // fourth quarter transform
259 // yx = ys * sx
260 const char notransp = 'n';
261 const char transp = 't';
262 const double one = 1.0;
263 const double zero = 0.0;
264 F77_DGEMM(¬ransp,&transp,&rank4,&rank2,&nbasis4,&one,vector4[0],&rank4,
265 sx_ptr,&rank2,&zero,ijxy_ints,&rank4);
266
267 // copy the result back to integrals_ijsx
268 memcpy((void*)sx_ptr,(const void*)ijxy_ints,xy_size);
269 }
270 }
271 }
272 }
273 delete[] ijxy_ints;
274 tim_exit("4. q.t.");
275 ExEnv::out0() << indent << "End of fourth q.t." << endl;
276
277 integral_ijsx = 0;
278 double* integral_ijxy = (double*) mem_->localdata();
279
280 // Zero out nonsymmetric integrals -- Pitzer theorem in action
281 {
282 for (int i = 0; i<ni; i++) {
283 for (int j = 0; j<rank3; j++) {
284 int ij = i*rank3+j;
285 int ij_local = ij/nproc;
286 if (ij%nproc == me) {
287 const int ij_sym = mosym1[i+i_offset] ^ mosym3[j];
288 for(int te_type=0; te_type<num_te_types_; te_type++) {
289 double* ijxy_ptr = (double *)((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
290 for (int x = 0; x<rank2; x++) {
291 const int ijx_sym = ij_sym ^ mosym2[x];
292 for (int y = 0; y<rank4; y++, ijxy_ptr++) {
293 if (ijx_sym ^ mosym4[y]) {
294 *ijxy_ptr = 0.0;
295 }
296 }
297 }
298 }
299 }
300 }
301 }
302 }
303 // Sync up tasks before integrals are committed
304 mem_->sync();
305
306 #if PRINT4Q
307 if ( me == 0 ) {
308 string filename = type() + "." + name_ + ".4q.dat";
309 ios_base::openmode mode = ios_base::trunc;
310 if (pass > 0)
311 mode = ios_base::app;
312 ofstream ints_file(filename.c_str(),mode);
313 for(int te_type=0; te_type<PRINT_NUM_TE_TYPES; te_type++) {
314 for (int i = 0; i<ni; i++) {
315 for (int j = 0; j<rank3; j++) {
316 int ij = i*rank3+j;
317 int ij_local = ij/nproc;
318 if (ij%nproc != me)
319 continue;
320 for (int x = 0; x<rank2; x++) {
321 const double* ijxy_ints = (const double*)((size_t)integral_ijxy + (ij_local*num_te_types_+te_type)*memgrp_blocksize);
322 for (int y = 0; y<rank4; y++) {
323 double value = ijxy_ints[x*rank4+y];
324 ints_file << scprintf("4Q: type = %d (%d %d|%d %d) = %12.8f\n",
325 te_type,i+i_offset,x,j,y,value);
326 }
327 }
328 }
329 }
330 }
331 ints_file.close();
332 }
333 #endif
334
335 // Push locally stored integrals to an accumulator
336 // This could involve storing the data to disk or simply remembering the pointer
337 tim_enter("MO ints store");
338 ints_acc_->store_memorygrp(mem_,ni,memgrp_blocksize);
339 tim_exit("MO ints store");
340 mem_->sync();
341
342 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint() && ints_acc_->can_restart()) {
343 StateOutBin stateout(top_mole_->checkpoint_file());
344 SavableState::save_state(top_mole_,stateout);
345 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
346 }
347
348 } // end of loop over passes
349 tim_exit("mp2-r12/a passes");
350 if (debug_)
351 ExEnv::out0() << indent << "End of mp2-r12/a transformation" << endl;
352 // Done storing integrals - commit the content
353 // WARNING: it is not safe to use mem until deactivate has been called on the accumulator
354 // After that deactivate the size of mem will be 0 [mem->set_localsize(0)]
355 ints_acc_->commit();
356
357
358 for (int i=0; i<thr_->nthread(); i++) {
359 delete e123thread[i];
360 }
361 delete[] e123thread;
362 delete[] tbints; tbints = 0;
363 delete[] vector4[0]; delete[] vector4;
364
365 tim_exit(tim_label.c_str());
366
367 if (me == 0 && top_mole_.nonnull() && top_mole_->if_to_checkpoint()) {
368 StateOutBin stateout(top_mole_->checkpoint_file());
369 SavableState::save_state(top_mole_,stateout);
370 ExEnv::out0() << indent << "Checkpointed the wave function" << endl;
371 }
372
373 print_footer();
374
375 #if CHECK_INTS_SYMM
376 ExEnv::out0() << indent << "Detecting non-totally-symmetric integrals ... ";
377 check_int_symm();
378 ExEnv::out0() << "none" << endl;
379 #endif
380
381 }
382
383
384 ////////////////////////////////////////////////////////////////////////////
385
386 // Local Variables:
387 // mode: c++
388 // c-file-style: "CLJ-CONDENSED"
389 // End:
390