1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: _bvrr_drv.h
4 // Copyright (C) 2012 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program. If not, see <http://www.gnu.org/licenses/>.
23 //
24
25 #ifndef __SRC_INTEGRAL_RYS____BVRR_DRIVER_H
26 #define __SRC_INTEGRAL_RYS____BVRR_DRIVER_H
27
28 #include <numeric>
29 #include <algorithm>
30 #include <array>
31 #include <src/integral/rys/int2d.h>
32 #include <src/integral/rys/scaledata.h>
33
34 namespace bagel {
35
36 template<int a_, int b_, int c_, int d_, int rank_>
bvrr_driver(double * out,const double * const roots,const double * const weights,const double & coeff,const std::array<double,3> & a,const std::array<double,3> & b,const std::array<double,3> & c,const std::array<double,3> & d,const double * const p,const double * const q,const double & xp,const double & xq,const size_t & size_block,const int * const amap,const int * const cmap,const int & asize_,double * const workx,double * const worky,double * const workz,double * const worktx,double * const workty,double * const worktz,double * const worksx,double * const worksy,double * const worksz)37 void bvrr_driver(double* out, const double* const roots, const double* const weights, const double& coeff,
38 const std::array<double,3>& a, const std::array<double,3>& b, const std::array<double,3>& c, const std::array<double,3>& d,
39 const double* const p, const double* const q, const double& xp, const double& xq, const size_t& size_block,
40 const int* const amap, const int* const cmap, const int& asize_, double* const workx, double* const worky, double* const workz,
41 double* const worktx, double* const workty, double* const worktz, double* const worksx, double* const worksy, double* const worksz) {
42
43 constexpr int amin_ = a_;
44 constexpr int cmin_ = c_;
45 constexpr int amax_ = a_+b_;
46 constexpr int cmax_ = c_+d_;
47 constexpr int amax1_ = amax_+1;
48 constexpr int cmax1_ = cmax_+1;
49 constexpr int amax2 = amax_+2;
50 constexpr int cmax2 = cmax_+2;
51 constexpr int isize = amax2 * cmax2;
52 constexpr int worksize = rank_ * isize;
53
54 double iyiz_nn[rank_];
55 double iyiz_tn[rank_];
56 double iyiz_nt[rank_];
57 double iyiz_tt[rank_];
58 double iyiz_sn[rank_];
59 double iyiz_ns[rank_];
60
61 const double oxp2 = 0.5 / xp;
62 const double oxq2 = 0.5 / xq;
63 const double opq = 1.0 / (xp + xq);
64
65 int2d<amax1_,cmax1_,rank_>(p[0], q[0], a[0], b[0], c[0], d[0], xp, xq, oxp2, oxq2, opq, roots, workx);
66 scaledata<rank_, worksize>(workx, weights, coeff*xp*xq*2.0*opq, workx);
67
68 int2d<amax1_,cmax1_,rank_>(p[1], q[1], a[1], b[1], c[1], d[1], xp, xq, oxp2, oxq2, opq, roots, worky);
69 int2d<amax1_,cmax1_,rank_>(p[2], q[2], a[2], b[2], c[2], d[2], xp, xq, oxp2, oxq2, opq, roots, workz);
70
71
72 const double pq[3] = {p[0]-q[0], p[1]-q[1], p[2]-q[2]};
73 const double ac[3] = {a[0]-c[0], a[1]-c[1], a[2]-c[2]};
74
75 // next compute \\tidle{I}_x,y,z up to amax_, cmax_
76 for (int ic = 0; ic <= cmax1_; ++ic)
77 for (int ia = 0; ia <= amax1_; ++ia)
78 for (int i = 0; i != rank_; ++i) {
79 worktx[i+rank_*(ia+amax2*ic)] = pq[0]*workx[i+rank_*(ia+amax2*ic)] + (ia==0 ? 0.0 : ia*oxp2*workx[i+rank_*(ia-1+amax2*ic)]) - (ic==0 ? 0.0 : ic*oxq2*workx[i+rank_*(ia+amax2*(ic-1))]);
80 workty[i+rank_*(ia+amax2*ic)] = pq[1]*worky[i+rank_*(ia+amax2*ic)] + (ia==0 ? 0.0 : ia*oxp2*worky[i+rank_*(ia-1+amax2*ic)]) - (ic==0 ? 0.0 : ic*oxq2*worky[i+rank_*(ia+amax2*(ic-1))]);
81 worktz[i+rank_*(ia+amax2*ic)] = pq[2]*workz[i+rank_*(ia+amax2*ic)] + (ia==0 ? 0.0 : ia*oxp2*workz[i+rank_*(ia-1+amax2*ic)]) - (ic==0 ? 0.0 : ic*oxq2*workz[i+rank_*(ia+amax2*(ic-1))]);
82 }
83 // then compute \\tilde{\\tilde{I_x,y,z up to amax_-1, cmax_-1
84 for (int ic = 0; ic != cmax1_; ++ic)
85 for (int ia = 0; ia != amax1_; ++ia)
86 for (int i = 0; i != rank_; ++i) {
87 worksx[i+rank_*(ia+amax2*ic)] = worktx[i+rank_*((ia+1)+amax2*ic)] - worktx[i+rank_*(ia+amax2*(ic+1))] + ac[0]*worktx[i+rank_*(ia+amax2*ic)];
88 worksy[i+rank_*(ia+amax2*ic)] = workty[i+rank_*((ia+1)+amax2*ic)] - workty[i+rank_*(ia+amax2*(ic+1))] + ac[1]*workty[i+rank_*(ia+amax2*ic)];
89 worksz[i+rank_*(ia+amax2*ic)] = worktz[i+rank_*((ia+1)+amax2*ic)] - worktz[i+rank_*(ia+amax2*(ic+1))] + ac[2]*worktz[i+rank_*(ia+amax2*ic)];
90 }
91
92 double* const dataxx = out;
93 double* const dataxy = dataxx + size_block;
94 double* const dataxz = dataxy + size_block;
95 double* const datayy = dataxz + size_block;
96 double* const datayz = datayy + size_block;
97 double* const datazz = datayz + size_block;
98
99 // assemble up to amax_, cmax_
100 for (int iz = 0; iz <= cmax_; ++iz) {
101 for (int iy = 0; iy <= cmax_ - iz; ++iy) {
102 for (int jz = 0; jz <= amax_; ++jz) {
103 for (int jy = 0; jy <= amax_ - jz; ++jy) {
104 const int offsetz = rank_ * (amax2 * iz + jz);
105 const int offsety = rank_ * (amax2 * iy + jy);
106 const int iyz = cmax1_ * (iy + cmax1_ * iz);
107 const int jyz = amax1_ * (jy + amax1_ * jz);
108 for (int i = 0; i != rank_; ++i) {
109 iyiz_nn[i] = worky [offsety + i] * workz [offsetz + i];
110 iyiz_tn[i] = workty[offsety + i] * workz [offsetz + i] * (1.0-roots[i]);
111 iyiz_nt[i] = worky [offsety + i] * worktz[offsetz + i] * (1.0-roots[i]);
112 iyiz_tt[i] = workty[offsety + i] * worktz[offsetz + i] * (1.0-roots[i]);
113 iyiz_sn[i] = worksy[offsety + i] * workz [offsetz + i];
114 iyiz_ns[i] = worky [offsety + i] * worksz[offsetz + i];
115 }
116 for (int ix = std::max(0, cmin_ - iy - iz); ix <= cmax_ - iy - iz; ++ix) {
117 const int iposition = cmap[ix + iyz];
118 const int ipos_asize = iposition * asize_;
119 for (int jx = std::max(0, amin_ - jy - jz); jx <= amax_ - jy - jz; ++jx) {
120 const int offsetx = rank_ * (amax2 * ix + jx);
121 const int jposition = amap[jx + jyz];
122 const int ijposition = jposition + ipos_asize;
123 dataxx[ijposition] = blas::dot_product_noconj(iyiz_nn, rank_, worksx+offsetx);
124 dataxy[ijposition] = blas::dot_product_noconj(iyiz_tn, rank_, worktx+offsetx);
125 dataxz[ijposition] = blas::dot_product_noconj(iyiz_nt, rank_, worktx+offsetx);
126 datayy[ijposition] = blas::dot_product_noconj(iyiz_sn, rank_, workx +offsetx);
127 datayz[ijposition] = blas::dot_product_noconj(iyiz_tt, rank_, workx +offsetx);
128 datazz[ijposition] = blas::dot_product_noconj(iyiz_ns, rank_, workx +offsetx);
129 }
130 }
131 }
132 }
133 }
134 }
135
136 }
137
138 }
139
140 #endif
141