// // transform_123inds.cc // // Copyright (C) 2001 Edward Valeev // // Author: Edward Valeev // Maintainer: EV // // This file is part of the SC Toolkit. // // The SC Toolkit is free software; you can redistribute it and/or modify // it under the terms of the GNU Library General Public License as published by // the Free Software Foundation; either version 2, or (at your option) // any later version. // // The SC Toolkit is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU Library General Public License for more details. // // You should have received a copy of the GNU Library General Public License // along with the SC Toolkit; see the file COPYING.LIB. If not, write to // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. // // The U.S. Government is granted a limited license as per AL 91-7. // #ifdef __GNUC__ #pragma implementation #endif #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using namespace sc; #define PRINT1Q 0 #define PRINT2Q 0 #define PRINT_NUM_TE_TYPES 1 // The FAST_BUT_WRONG flags is useful for exercising the communications // layer. It causes the first and second quarter transformation to be // omitted, but all communication is still performed. This permits // problems in communications libraries to be more quickly identified. #define FAST_BUT_WRONG 0 TwoBodyMOIntsTransform_123Inds::TwoBodyMOIntsTransform_123Inds( const Ref& tform, int mythread, int nthread, const Ref& lock, const Ref &tbint, double tol, int debug) : tform_(tform), mythread_(mythread), nthread_(nthread), lock_(lock), tbint_(tbint), tol_(tol), debug_(debug) { timer_ = new RegionTimer(); aoint_computed_ = 0; ni_ = tform_->batchsize(); i_offset_ = 0; } TwoBodyMOIntsTransform_123Inds::~TwoBodyMOIntsTransform_123Inds() { timer_ = 0; } /* Distribute work by SR for all PQ compute unique (PQ|RS) transform to (RS|IM) where M are all AOs for basis set 2 end PQ use BLAS to transform each rsIM to rsIX transform RSIX to IJXS and accumulate to the tasks that holds respective ij-pairs. end SR */ void TwoBodyMOIntsTransform_123Inds::run() { Ref mem = tform_->mem(); Ref msg = tform_->msg(); Ref ints_acc = tform_->ints_acc(); const int me = msg->me(); const int nproc = msg->n(); Ref space1 = tform_->space1(); Ref space2 = tform_->space2(); Ref space3 = tform_->space3(); Ref space4 = tform_->space4(); Ref bs1 = space1->basis(); Ref bs2 = space2->basis(); Ref bs3 = space3->basis(); Ref bs4 = space4->basis(); const bool bs1_eq_bs2 = (bs1 == bs2); const bool bs3_eq_bs4 = (bs3 == bs4); const bool dynamic = tform_->dynamic(); const double print_percent = tform_->print_percent(); const int ni = ni_; const int rank1 = space1->rank(); const int rank2 = space2->rank(); const int rank3 = space3->rank(); const int nfuncmax1 = bs1->max_nfunction_in_shell(); const int nfuncmax2 = bs2->max_nfunction_in_shell(); const int nfuncmax3 = bs3->max_nfunction_in_shell(); const int nfuncmax4 = bs4->max_nfunction_in_shell(); const int nsh1 = bs1->nshell(); const int nsh2 = bs2->nshell(); const int nsh3 = bs3->nshell(); const int nsh4 = bs4->nshell(); const int nbasis1 = bs1->nbasis(); const int nbasis2 = bs2->nbasis(); const int nbasis3 = bs3->nbasis(); const int nbasis4 = bs4->nbasis(); double dtol = pow(2.0,tol_); const size_t memgrp_blksize = tform_->memgrp_blksize()/sizeof(double); double** vector1 = new double*[nbasis1]; double** vector2 = new double*[nbasis2]; double** vector3 = new double*[nbasis3]; vector1[0] = new double[rank1*nbasis1]; vector2[0] = new double[rank2*nbasis2]; vector3[0] = new double[rank3*nbasis3]; for(int i=1; icoefs().convert(vector1); space2->coefs().convert(vector2); space3->coefs().convert(vector3); /*------------------------------------------------------------- Find integrals buffers to 1/r12, r12, and [r12,T1] integrals -------------------------------------------------------------*/ const int num_te_types = tform_->num_te_types(); const double *intbuf[TwoBodyInt::num_tbint_types]; intbuf[TwoBodyInt::eri] = tbint_->buffer(TwoBodyInt::eri); intbuf[TwoBodyInt::r12] = tbint_->buffer(TwoBodyInt::r12); intbuf[TwoBodyInt::r12t1] = tbint_->buffer(TwoBodyInt::r12t1); intbuf[TwoBodyInt::r12t2] = tbint_->buffer(TwoBodyInt::r12t2); /*----------------------------------------------------- Allocate buffers for partially transformed integrals -----------------------------------------------------*/ double *ijsx_contrib; // local contributions to integral_ijsx double *ijrx_contrib; // local contributions to integral_ijrx double **rsiq_ints = new double*[num_te_types]; // quarter-transformed integrals for each RS pair double **rsix_ints = new double*[num_te_types]; // 2 quarter-transformed integrals for each RS pair for(int te_type=0;te_typemalloc_local_double(rank2*nfuncmax4); if (bs3_eq_bs4) ijrx_contrib = mem->malloc_local_double(rank2*nfuncmax4); else ijrx_contrib = NULL; /*----------------------------- Initialize work distribution -----------------------------*/ DistShellPair shellpairs(msg,nthread_,mythread_,lock_,bs4,bs3,dynamic, tform_->shell_pair_data()); shellpairs.set_debug(debug_); if (debug_) shellpairs.set_print_percent(print_percent/10.0); else shellpairs.set_print_percent(print_percent); int work_per_thread = bs3_eq_bs4 ? ((nsh3*(nsh3+1))/2)/(nproc*nthread_) : (nsh3*nsh4)/(nproc*nthread_) ; int print_interval = work_per_thread/100; int time_interval = work_per_thread/10; int print_index = 0; if (print_interval == 0) print_interval = 1; if (time_interval == 0) time_interval = 1; if (work_per_thread == 0) work_per_thread = 1; if (debug_) { lock_->lock(); ExEnv::outn() << scprintf("%d:%d: starting get_task loop",me,mythread_) << endl; lock_->unlock(); } Ref p4list = construct_gpetite(bs1,bs2,bs3,bs4); #if FAST_BUT_WRONG for(int te_type=0;te_type= R always (see sc::DistShellPair) int nr = bs3->shell(R).nfunction(); int r_offset = bs3->shell_to_function(R); int ns = bs4->shell(S).nfunction(); int s_offset = bs4->shell_to_function(S); int nrs = nr*ns; if (debug_ > 1 && (print_index++)%print_interval == 0) { lock_->lock(); ExEnv::outn() << scprintf("%d:%d: (PQ|%d %d) %d%%", me,mythread_,R,S,(100*print_index)/work_per_thread) << endl; lock_->unlock(); } if (debug_ > 1 && (print_index)%time_interval == 0) { lock_->lock(); ExEnv::outn() << scprintf("timer for %d:%d:",me,mythread_) << endl; timer_->print(); lock_->unlock(); } #if !FAST_BUT_WRONG // Zero out 1 q.t. storage for(int te_type=0;te_typeshell(P).nfunction(); int p_offset = bs1->shell_to_function(P); int Qmax = (bs1_eq_bs2 ? P : nsh2-1); for (int Q=0; Q<=Qmax; Q++) { int nq = bs2->shell(Q).nfunction(); int q_offset = bs2->shell_to_function(Q); // check if symmetry unique and compute degeneracy int deg = p4list->in_p4(P,Q,R,S); if (deg == 0) continue; double symfac = (double) deg; if (tbint_->log2_shell_bound(P,Q,R,S) < tol_) { continue; // skip shell quartets less than tol } aoint_computed_++; timer_->enter("AO integrals"); tbint_->compute_shell(P,Q,R,S); timer_->exit("AO integrals"); timer_->enter("1. q.t."); // Begin first quarter transformation; // generate (iq|rs) for i active // if bs1_eq_bs2 then (ip|rs) are also generated // store the integrals as rsiq for(int te_type=0; te_type dtol) { double* rsiq_ptr = &rsiq_ints[te_type][q + nbasis2*(0 + ni*(bf4 + ns*bf3))]; const double* c_pi = vector1[p] + i_offset_; double* rsip_ptr; const double* c_qi; if (bs1_eq_bs2) { rsip_ptr = &rsiq_ints[te_type][p + nbasis2*(0 + ni*(bf4 + ns*bf3))]; c_qi = vector1[q] + i_offset_; } double rsiq_int_contrib = *pqrs_ptr; // multiply each integral by its symmetry degeneracy factor rsiq_int_contrib *= symfac; if (bs1_eq_bs2) { double rsip_int_contrib = rsiq_int_contrib; if (te_type == TwoBodyInt::r12t1) rsip_int_contrib = -1.0*rsiq_int_contrib; if (p == q) { for (int i=0; iexit("1. q.t."); } // exit P loop } // exit Q loop #endif // !FAST_BUT_WRONG #if PRINT1Q { if ( me == 0 ) { lock_->lock(); string filename = tform_->type() + "." + tform_->name() + ".1q.dat"; ios_base::openmode mode = ios_base::app; if (RS_count == 0) mode = ios_base::trunc; ofstream ints_file(filename.c_str(),mode); for(int te_type=0; te_typeunlock(); } } #endif const int nix = ni*rank2; const int niq = ni*nbasis2; timer_->enter("2. q.t."); // Begin second quarter transformation; // generate (ix|rs) stored as rsix for(int te_type=0; te_typeexit("2. q.t."); #if PRINT2Q { if ( me == 0 ) { lock_->lock(); string filename = tform_->type() + "." + tform_->name() + ".2q.dat"; ios_base::openmode mode = ios_base::app; if (RS_count == 0) mode = ios_base::trunc; ofstream ints_file(filename.c_str(),mode); for(int te_type=0; te_typeunlock(); } } #endif timer_->enter("3. q.t."); // Begin third quarter transformation; // generate (ix|js) stored as ijsx (also generate (ix|jr), if needed) for(int te_type=0; te_typesum_reduction_on_node(ijsx_contrib, ij_offset, ns*rank2, ij_proc); if (bs3_eq_bs4) { size_t ij_offset = (size_t)rank2*r_offset + ijsx_start; mem->sum_reduction_on_node(ijrx_contrib, ij_offset, nr*rank2, ij_proc); } } // endif j } // endif i } // endif te_type timer_->exit("3. q.t."); ++RS_count; } // exit while get_task if (debug_) { lock_->lock(); ExEnv::outn() << scprintf("%d:%d: done with get_task loop",me,mythread_) << endl; lock_->unlock(); } for(int te_type=0; te_typefree_local_double(ijsx_contrib); if (bs3_eq_bs4) mem->free_local_double(ijrx_contrib); delete[] vector1[0]; delete[] vector1; delete[] vector2[0]; delete[] vector2; delete[] vector3[0]; delete[] vector3; } //////////////////////////////////////////////////////////////////////////// // Local Variables: // mode: c++ // c-file-style: "CLJ-CONDENSED" // End: