1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: angmombatch.cc
4 // Copyright (C) 2015 Toru Shiozaki
5 //
6 // Author: Ryan D. Reynolds <RyanDReynolds@u.northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 //
24 
25 
26 #include <src/integral/carsphlist.h>
27 #include <src/integral/os/angmombatch.h>
28 
29 using namespace std;
30 using namespace bagel;
31 
32 const static CarSphList carsphlist;
33 
34 
compute()35 void AngMomBatch::compute() {
36 
37   const SortList sort_(spherical_);
38 
39   double* const intermediate_p = stack_->get<double>(size_block_*3);
40   fill_n(intermediate_p, size_block_*3, 0.0);
41   perform_VRR(intermediate_p);
42 
43   for (int i = 0; i != 3; ++i) {
44     double* const cdata = data_ + i*size_block_;
45     const double* const csource = intermediate_p + i*size_block_;
46     double* const intermediate_c = stack_->get<double>(cont0_ * cont1_ * asize_intermediate_);
47     fill_n(intermediate_c, cont0_ * cont1_ * asize_intermediate_, 0.0);
48     perform_contraction(asize_intermediate_, csource, prim0_, prim1_, intermediate_c,
49                         basisinfo_[0]->contractions(), basisinfo_[0]->contraction_ranges(), cont0_,
50                         basisinfo_[1]->contractions(), basisinfo_[1]->contraction_ranges(), cont1_);
51 
52     if (basisinfo_[0]->spherical() && basisinfo_[1]->spherical()) {
53       // transform both indices to spherical
54       double* const intermediate_i = stack_->get<double>(cont0_ * cont1_ * asize_final_);
55       fill_n(intermediate_i, cont0_ * cont1_ * asize_final_, 0.0);
56       const unsigned int carsph_index = basisinfo_[0]->angular_number() * ANG_HRR_END + basisinfo_[1]->angular_number();
57       const int nloops = cont0_ * cont1_;
58       carsphlist.carsphfunc_call(carsph_index, nloops, intermediate_c, intermediate_i);
59 
60       const unsigned int sort_index = basisinfo_[1]->angular_number() * ANG_HRR_END + basisinfo_[0]->angular_number();
61       sort_.sortfunc_call(sort_index, cdata, intermediate_i, cont1_, cont0_, 1, swap01_);
62       stack_->release(cont0_ * cont1_ * asize_final_, intermediate_i);
63     } else {
64       const unsigned int sort_index = basisinfo_[1]->angular_number() * ANG_HRR_END + basisinfo_[0]->angular_number();
65       sort_.sortfunc_call(sort_index, cdata, intermediate_c, cont1_, cont0_, 1, swap01_);
66     }
67 
68     stack_->release(cont0_ * cont1_ * asize_intermediate_, intermediate_c);
69   }
70   stack_->release(size_block_*3, intermediate_p);
71 
72 }
73 
74 
perform_VRR(double * intermediate)75 void AngMomBatch::perform_VRR(double* intermediate) {
76   const int amax3 = amax1_+2;
77   const int worksize = amax3;
78   double* worksx = stack_->get<double>(worksize * worksize);
79   double* worksy = stack_->get<double>(worksize * worksize);
80   double* worksz = stack_->get<double>(worksize * worksize);
81 
82   const double imag = swap01_ ? -1.0 : 1.0;
83   const double Bx = basisinfo_[1]->position(0);
84   const double By = basisinfo_[1]->position(1);
85   const double Bz = basisinfo_[1]->position(2);
86 
87   for (int ii = 0; ii != prim0_ * prim1_; ++ii) {
88     // Perform VRR
89     int offset_ii = ii * asize_intermediate_;
90     const double cop = 1.0 / xp_[ii];
91     const double cb = xb_[ii];
92     const double cxpa = P_[ii * 3    ] - basisinfo_[0]->position(0);
93     const double cypa = P_[ii * 3 + 1] - basisinfo_[0]->position(1);
94     const double czpa = P_[ii * 3 + 2] - basisinfo_[0]->position(2);
95     double* current_data = &intermediate[offset_ii];
96 
97     // obtain S(0, 0)
98     worksx[0] = coeffsx_[ii];
99     worksy[0] = coeffsy_[ii];
100     worksz[0] = coeffsz_[ii];
101 
102     // obtain S(1, 0)
103     worksx[1] = cxpa * worksx[0];
104     worksy[1] = cypa * worksy[0];
105     worksz[1] = czpa * worksz[0];
106 
107     for (int i = 2; i != amax3; ++i) {
108       // obtain S(i, 0)
109       worksx[i] = cxpa * worksx[i-1] + 0.5 * (i-1) * cop * worksx[i-2];
110       worksy[i] = cypa * worksy[i-1] + 0.5 * (i-1) * cop * worksy[i-2];
111       worksz[i] = czpa * worksz[i-1] + 0.5 * (i-1) * cop * worksz[i-2];
112     }
113 
114     // peform HRR
115     {
116       const int j = 1;
117       // obtain S(0, 1)
118       worksx[j * amax3] = AB_[0] * worksx[(j-1) * amax3] + worksx[(j-1) * amax3 + 1];
119       worksy[j * amax3] = AB_[1] * worksy[(j-1) * amax3] + worksy[(j-1) * amax3 + 1];
120       worksz[j * amax3] = AB_[2] * worksz[(j-1) * amax3] + worksz[(j-1) * amax3 + 1];
121 
122       for (int i = 1; i != amax3 - j; ++i) {
123         // obtain S(i, 1)
124         worksx[j * amax3 + i] = AB_[0] * worksx[(j-1) * amax3 + i] + worksx[(j-1) * amax3 + i + 1];
125         worksy[j * amax3 + i] = AB_[1] * worksy[(j-1) * amax3 + i] + worksy[(j-1) * amax3 + i + 1];
126         worksz[j * amax3 + i] = AB_[2] * worksz[(j-1) * amax3 + i] + worksz[(j-1) * amax3 + i + 1];
127       }
128     }
129     for (int j = 2; j <= ang1_ + 2; ++j) {
130       // obtain S(0, j)
131       worksx[j * amax3] = AB_[0] * worksx[(j-1) * amax3] + worksx[(j-1) * amax3 + 1];
132       worksy[j * amax3] = AB_[1] * worksy[(j-1) * amax3] + worksy[(j-1) * amax3 + 1];
133       worksz[j * amax3] = AB_[2] * worksz[(j-1) * amax3] + worksz[(j-1) * amax3 + 1];
134 
135       for (int i = 1; i != amax3 - j; ++i) {
136         // obtain S(i, j)
137         worksx[j * amax3 + i] = AB_[0] * worksx[(j-1) * amax3 + i] + worksx[(j-1) * amax3 + i + 1];
138         worksy[j * amax3 + i] = AB_[1] * worksy[(j-1) * amax3 + i] + worksy[(j-1) * amax3 + i + 1];
139         worksz[j * amax3 + i] = AB_[2] * worksz[(j-1) * amax3 + i] + worksz[(j-1) * amax3 + i + 1];
140       }
141     }
142 
143     double Sx, Sy, Sz;    // operator = 1
144     double ox, oy, oz;    // operator = x
145     double dx, dy, dz;    // operator = d/dx
146 
147     // now we obtain the output
148     int cnt = 0;
149     for (int iz = 0; iz <= ang0_; ++iz) {
150       for (int iy = 0; iy <= ang0_ - iz; ++iy) {
151         const int ix = ang0_ - iy - iz;
152         if (ix >= 0) {
153           for (int jz = 0; jz <= ang1_; ++jz) {
154             for (int jy = 0; jy <= ang1_ - jz; ++jy) {
155               const int jx = ang1_ - jy - jz;
156               if (jx >= 0) {
157 
158                 const double jxd = jx;
159                 const double jyd = jy;
160                 const double jzd = jz;
161                 double minus1x = 0.0;
162                 double minus1y = 0.0;
163                 double minus1z = 0.0;
164                 if (jx > 0) minus1x = jxd * worksx[ix + amax3 * (jx-1)];
165                 if (jy > 0) minus1y = jyd * worksy[iy + amax3 * (jy-1)];
166                 if (jz > 0) minus1z = jzd * worksz[iz + amax3 * (jz-1)];
167 
168                 Sx = worksx[ix + amax3 * jx];
169                 Sy = worksy[iy + amax3 * jy];
170                 Sz = worksz[iz + amax3 * jz];
171 
172                 ox = worksx[ix + amax3 * (jx+1)] + Bx * worksx[ix + amax3 * jx];
173                 oy = worksy[iy + amax3 * (jy+1)] + By * worksy[iy + amax3 * jy];
174                 oz = worksz[iz + amax3 * (jz+1)] + Bz * worksz[iz + amax3 * jz];
175 
176                 dx = minus1x - 2.0 * cb * worksx[ix+amax3 * (jx+1)];
177                 dy = minus1y - 2.0 * cb * worksy[iy+amax3 * (jy+1)];
178                 dz = minus1z - 2.0 * cb * worksz[iz+amax3 * (jz+1)];
179 
180                 current_data[cnt              ] = -imag * mcoord_[2] * Sx * dy * Sz;
181                 current_data[cnt+size_block_  ] = -imag * mcoord_[0] * Sx * Sy * dz;
182                 current_data[cnt+size_block_*2] = -imag * mcoord_[1] * dx * Sy * Sz;
183 
184                 current_data[cnt              ] += imag * mcoord_[1] * Sx * Sy * dz;
185                 current_data[cnt+size_block_  ] += imag * mcoord_[2] * dx * Sy * Sz;
186                 current_data[cnt+size_block_*2] += imag * mcoord_[0] * Sx * dy * Sz;
187 
188                 current_data[cnt              ] -= imag * Sx * oy * dz;
189                 current_data[cnt+size_block_  ] -= imag * dx * Sy * oz;
190                 current_data[cnt+size_block_*2] -= imag * ox * dy * Sz;
191 
192                 current_data[cnt              ] += imag * Sx * dy * oz;
193                 current_data[cnt+size_block_  ] += imag * ox * Sy * dz;
194                 current_data[cnt+size_block_*2] += imag * dx * oy * Sz;
195 
196                 ++cnt;
197               }
198             }
199           }
200         }
201       }
202     }
203     assert(cnt == asize_intermediate_);
204 
205   } // end of prim exponent loop
206 
207   stack_->release(worksize * worksize, worksz);
208   stack_->release(worksize * worksize, worksy);
209   stack_->release(worksize * worksize, worksx);
210 }
211 
212