1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: angmombatch.cc
4 // Copyright (C) 2015 Toru Shiozaki
5 //
6 // Author: Ryan D. Reynolds <RyanDReynolds@u.northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program. If not, see <http://www.gnu.org/licenses/>.
23 //
24
25
26 #include <src/integral/carsphlist.h>
27 #include <src/integral/os/angmombatch.h>
28
29 using namespace std;
30 using namespace bagel;
31
32 const static CarSphList carsphlist;
33
34
compute()35 void AngMomBatch::compute() {
36
37 const SortList sort_(spherical_);
38
39 double* const intermediate_p = stack_->get<double>(size_block_*3);
40 fill_n(intermediate_p, size_block_*3, 0.0);
41 perform_VRR(intermediate_p);
42
43 for (int i = 0; i != 3; ++i) {
44 double* const cdata = data_ + i*size_block_;
45 const double* const csource = intermediate_p + i*size_block_;
46 double* const intermediate_c = stack_->get<double>(cont0_ * cont1_ * asize_intermediate_);
47 fill_n(intermediate_c, cont0_ * cont1_ * asize_intermediate_, 0.0);
48 perform_contraction(asize_intermediate_, csource, prim0_, prim1_, intermediate_c,
49 basisinfo_[0]->contractions(), basisinfo_[0]->contraction_ranges(), cont0_,
50 basisinfo_[1]->contractions(), basisinfo_[1]->contraction_ranges(), cont1_);
51
52 if (basisinfo_[0]->spherical() && basisinfo_[1]->spherical()) {
53 // transform both indices to spherical
54 double* const intermediate_i = stack_->get<double>(cont0_ * cont1_ * asize_final_);
55 fill_n(intermediate_i, cont0_ * cont1_ * asize_final_, 0.0);
56 const unsigned int carsph_index = basisinfo_[0]->angular_number() * ANG_HRR_END + basisinfo_[1]->angular_number();
57 const int nloops = cont0_ * cont1_;
58 carsphlist.carsphfunc_call(carsph_index, nloops, intermediate_c, intermediate_i);
59
60 const unsigned int sort_index = basisinfo_[1]->angular_number() * ANG_HRR_END + basisinfo_[0]->angular_number();
61 sort_.sortfunc_call(sort_index, cdata, intermediate_i, cont1_, cont0_, 1, swap01_);
62 stack_->release(cont0_ * cont1_ * asize_final_, intermediate_i);
63 } else {
64 const unsigned int sort_index = basisinfo_[1]->angular_number() * ANG_HRR_END + basisinfo_[0]->angular_number();
65 sort_.sortfunc_call(sort_index, cdata, intermediate_c, cont1_, cont0_, 1, swap01_);
66 }
67
68 stack_->release(cont0_ * cont1_ * asize_intermediate_, intermediate_c);
69 }
70 stack_->release(size_block_*3, intermediate_p);
71
72 }
73
74
perform_VRR(double * intermediate)75 void AngMomBatch::perform_VRR(double* intermediate) {
76 const int amax3 = amax1_+2;
77 const int worksize = amax3;
78 double* worksx = stack_->get<double>(worksize * worksize);
79 double* worksy = stack_->get<double>(worksize * worksize);
80 double* worksz = stack_->get<double>(worksize * worksize);
81
82 const double imag = swap01_ ? -1.0 : 1.0;
83 const double Bx = basisinfo_[1]->position(0);
84 const double By = basisinfo_[1]->position(1);
85 const double Bz = basisinfo_[1]->position(2);
86
87 for (int ii = 0; ii != prim0_ * prim1_; ++ii) {
88 // Perform VRR
89 int offset_ii = ii * asize_intermediate_;
90 const double cop = 1.0 / xp_[ii];
91 const double cb = xb_[ii];
92 const double cxpa = P_[ii * 3 ] - basisinfo_[0]->position(0);
93 const double cypa = P_[ii * 3 + 1] - basisinfo_[0]->position(1);
94 const double czpa = P_[ii * 3 + 2] - basisinfo_[0]->position(2);
95 double* current_data = &intermediate[offset_ii];
96
97 // obtain S(0, 0)
98 worksx[0] = coeffsx_[ii];
99 worksy[0] = coeffsy_[ii];
100 worksz[0] = coeffsz_[ii];
101
102 // obtain S(1, 0)
103 worksx[1] = cxpa * worksx[0];
104 worksy[1] = cypa * worksy[0];
105 worksz[1] = czpa * worksz[0];
106
107 for (int i = 2; i != amax3; ++i) {
108 // obtain S(i, 0)
109 worksx[i] = cxpa * worksx[i-1] + 0.5 * (i-1) * cop * worksx[i-2];
110 worksy[i] = cypa * worksy[i-1] + 0.5 * (i-1) * cop * worksy[i-2];
111 worksz[i] = czpa * worksz[i-1] + 0.5 * (i-1) * cop * worksz[i-2];
112 }
113
114 // peform HRR
115 {
116 const int j = 1;
117 // obtain S(0, 1)
118 worksx[j * amax3] = AB_[0] * worksx[(j-1) * amax3] + worksx[(j-1) * amax3 + 1];
119 worksy[j * amax3] = AB_[1] * worksy[(j-1) * amax3] + worksy[(j-1) * amax3 + 1];
120 worksz[j * amax3] = AB_[2] * worksz[(j-1) * amax3] + worksz[(j-1) * amax3 + 1];
121
122 for (int i = 1; i != amax3 - j; ++i) {
123 // obtain S(i, 1)
124 worksx[j * amax3 + i] = AB_[0] * worksx[(j-1) * amax3 + i] + worksx[(j-1) * amax3 + i + 1];
125 worksy[j * amax3 + i] = AB_[1] * worksy[(j-1) * amax3 + i] + worksy[(j-1) * amax3 + i + 1];
126 worksz[j * amax3 + i] = AB_[2] * worksz[(j-1) * amax3 + i] + worksz[(j-1) * amax3 + i + 1];
127 }
128 }
129 for (int j = 2; j <= ang1_ + 2; ++j) {
130 // obtain S(0, j)
131 worksx[j * amax3] = AB_[0] * worksx[(j-1) * amax3] + worksx[(j-1) * amax3 + 1];
132 worksy[j * amax3] = AB_[1] * worksy[(j-1) * amax3] + worksy[(j-1) * amax3 + 1];
133 worksz[j * amax3] = AB_[2] * worksz[(j-1) * amax3] + worksz[(j-1) * amax3 + 1];
134
135 for (int i = 1; i != amax3 - j; ++i) {
136 // obtain S(i, j)
137 worksx[j * amax3 + i] = AB_[0] * worksx[(j-1) * amax3 + i] + worksx[(j-1) * amax3 + i + 1];
138 worksy[j * amax3 + i] = AB_[1] * worksy[(j-1) * amax3 + i] + worksy[(j-1) * amax3 + i + 1];
139 worksz[j * amax3 + i] = AB_[2] * worksz[(j-1) * amax3 + i] + worksz[(j-1) * amax3 + i + 1];
140 }
141 }
142
143 double Sx, Sy, Sz; // operator = 1
144 double ox, oy, oz; // operator = x
145 double dx, dy, dz; // operator = d/dx
146
147 // now we obtain the output
148 int cnt = 0;
149 for (int iz = 0; iz <= ang0_; ++iz) {
150 for (int iy = 0; iy <= ang0_ - iz; ++iy) {
151 const int ix = ang0_ - iy - iz;
152 if (ix >= 0) {
153 for (int jz = 0; jz <= ang1_; ++jz) {
154 for (int jy = 0; jy <= ang1_ - jz; ++jy) {
155 const int jx = ang1_ - jy - jz;
156 if (jx >= 0) {
157
158 const double jxd = jx;
159 const double jyd = jy;
160 const double jzd = jz;
161 double minus1x = 0.0;
162 double minus1y = 0.0;
163 double minus1z = 0.0;
164 if (jx > 0) minus1x = jxd * worksx[ix + amax3 * (jx-1)];
165 if (jy > 0) minus1y = jyd * worksy[iy + amax3 * (jy-1)];
166 if (jz > 0) minus1z = jzd * worksz[iz + amax3 * (jz-1)];
167
168 Sx = worksx[ix + amax3 * jx];
169 Sy = worksy[iy + amax3 * jy];
170 Sz = worksz[iz + amax3 * jz];
171
172 ox = worksx[ix + amax3 * (jx+1)] + Bx * worksx[ix + amax3 * jx];
173 oy = worksy[iy + amax3 * (jy+1)] + By * worksy[iy + amax3 * jy];
174 oz = worksz[iz + amax3 * (jz+1)] + Bz * worksz[iz + amax3 * jz];
175
176 dx = minus1x - 2.0 * cb * worksx[ix+amax3 * (jx+1)];
177 dy = minus1y - 2.0 * cb * worksy[iy+amax3 * (jy+1)];
178 dz = minus1z - 2.0 * cb * worksz[iz+amax3 * (jz+1)];
179
180 current_data[cnt ] = -imag * mcoord_[2] * Sx * dy * Sz;
181 current_data[cnt+size_block_ ] = -imag * mcoord_[0] * Sx * Sy * dz;
182 current_data[cnt+size_block_*2] = -imag * mcoord_[1] * dx * Sy * Sz;
183
184 current_data[cnt ] += imag * mcoord_[1] * Sx * Sy * dz;
185 current_data[cnt+size_block_ ] += imag * mcoord_[2] * dx * Sy * Sz;
186 current_data[cnt+size_block_*2] += imag * mcoord_[0] * Sx * dy * Sz;
187
188 current_data[cnt ] -= imag * Sx * oy * dz;
189 current_data[cnt+size_block_ ] -= imag * dx * Sy * oz;
190 current_data[cnt+size_block_*2] -= imag * ox * dy * Sz;
191
192 current_data[cnt ] += imag * Sx * dy * oz;
193 current_data[cnt+size_block_ ] += imag * ox * Sy * dz;
194 current_data[cnt+size_block_*2] += imag * dx * oy * Sz;
195
196 ++cnt;
197 }
198 }
199 }
200 }
201 }
202 }
203 assert(cnt == asize_intermediate_);
204
205 } // end of prim exponent loop
206
207 stack_->release(worksize * worksize, worksz);
208 stack_->release(worksize * worksize, worksy);
209 stack_->release(worksize * worksize, worksx);
210 }
211
212