1 //
2 // BAGEL - Brilliantly Advanced General Electronic Structure Library
3 // Filename: _bvrr_drv.h
4 // Copyright (C) 2012 Toru Shiozaki
5 //
6 // Author: Toru Shiozaki <shiozaki@northwestern.edu>
7 // Maintainer: Shiozaki group
8 //
9 // This file is part of the BAGEL package.
10 //
11 // This program is free software: you can redistribute it and/or modify
12 // it under the terms of the GNU General Public License as published by
13 // the Free Software Foundation, either version 3 of the License, or
14 // (at your option) any later version.
15 //
16 // This program is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 // GNU General Public License for more details.
20 //
21 // You should have received a copy of the GNU General Public License
22 // along with this program.  If not, see <http://www.gnu.org/licenses/>.
23 //
24 
25 #ifndef __SRC_INTEGRAL_RYS____BVRR_DRIVER_H
26 #define __SRC_INTEGRAL_RYS____BVRR_DRIVER_H
27 
28 #include <numeric>
29 #include <algorithm>
30 #include <array>
31 #include <src/integral/rys/int2d.h>
32 #include <src/integral/rys/scaledata.h>
33 
34 namespace bagel {
35 
36 template<int a_, int b_, int c_, int d_, int rank_>
bvrr_driver(double * out,const double * const roots,const double * const weights,const double & coeff,const std::array<double,3> & a,const std::array<double,3> & b,const std::array<double,3> & c,const std::array<double,3> & d,const double * const p,const double * const q,const double & xp,const double & xq,const size_t & size_block,const int * const amap,const int * const cmap,const int & asize_,double * const workx,double * const worky,double * const workz,double * const worktx,double * const workty,double * const worktz,double * const worksx,double * const worksy,double * const worksz)37 void bvrr_driver(double* out, const double* const roots, const double* const weights, const double& coeff,
38                 const std::array<double,3>& a, const std::array<double,3>& b, const std::array<double,3>& c, const std::array<double,3>& d,
39                 const double* const p, const double* const q, const double& xp, const double& xq, const size_t& size_block,
40                 const int* const amap, const int* const cmap, const int& asize_, double* const workx, double* const worky, double* const workz,
41                 double* const worktx, double* const workty, double* const worktz, double* const worksx, double* const worksy, double* const worksz) {
42 
43   constexpr int amin_ = a_;
44   constexpr int cmin_ = c_;
45   constexpr int amax_ = a_+b_;
46   constexpr int cmax_ = c_+d_;
47   constexpr int amax1_ = amax_+1;
48   constexpr int cmax1_ = cmax_+1;
49   constexpr int amax2 = amax_+2;
50   constexpr int cmax2 = cmax_+2;
51   constexpr int isize = amax2 * cmax2;
52   constexpr int worksize = rank_ * isize;
53 
54   double iyiz_nn[rank_];
55   double iyiz_tn[rank_];
56   double iyiz_nt[rank_];
57   double iyiz_tt[rank_];
58   double iyiz_sn[rank_];
59   double iyiz_ns[rank_];
60 
61   const double oxp2 = 0.5 / xp;
62   const double oxq2 = 0.5 / xq;
63   const double opq = 1.0 / (xp + xq);
64 
65   int2d<amax1_,cmax1_,rank_>(p[0], q[0], a[0], b[0], c[0], d[0], xp, xq, oxp2, oxq2, opq, roots, workx);
66   scaledata<rank_, worksize>(workx, weights, coeff*xp*xq*2.0*opq, workx);
67 
68   int2d<amax1_,cmax1_,rank_>(p[1], q[1], a[1], b[1], c[1], d[1], xp, xq, oxp2, oxq2, opq, roots, worky);
69   int2d<amax1_,cmax1_,rank_>(p[2], q[2], a[2], b[2], c[2], d[2], xp, xq, oxp2, oxq2, opq, roots, workz);
70 
71 
72   const double pq[3] = {p[0]-q[0], p[1]-q[1], p[2]-q[2]};
73   const double ac[3] = {a[0]-c[0], a[1]-c[1], a[2]-c[2]};
74 
75   // next compute \\tidle{I}_x,y,z up to amax_, cmax_
76   for (int ic = 0; ic <= cmax1_; ++ic)
77     for (int ia = 0; ia <= amax1_; ++ia)
78       for (int i = 0; i != rank_; ++i) {
79         worktx[i+rank_*(ia+amax2*ic)] = pq[0]*workx[i+rank_*(ia+amax2*ic)] + (ia==0 ? 0.0 : ia*oxp2*workx[i+rank_*(ia-1+amax2*ic)]) - (ic==0 ? 0.0 : ic*oxq2*workx[i+rank_*(ia+amax2*(ic-1))]);
80         workty[i+rank_*(ia+amax2*ic)] = pq[1]*worky[i+rank_*(ia+amax2*ic)] + (ia==0 ? 0.0 : ia*oxp2*worky[i+rank_*(ia-1+amax2*ic)]) - (ic==0 ? 0.0 : ic*oxq2*worky[i+rank_*(ia+amax2*(ic-1))]);
81         worktz[i+rank_*(ia+amax2*ic)] = pq[2]*workz[i+rank_*(ia+amax2*ic)] + (ia==0 ? 0.0 : ia*oxp2*workz[i+rank_*(ia-1+amax2*ic)]) - (ic==0 ? 0.0 : ic*oxq2*workz[i+rank_*(ia+amax2*(ic-1))]);
82       }
83   // then compute \\tilde{\\tilde{I_x,y,z up to amax_-1, cmax_-1
84   for (int ic = 0; ic != cmax1_; ++ic)
85     for (int ia = 0; ia != amax1_; ++ia)
86       for (int i = 0; i != rank_; ++i) {
87         worksx[i+rank_*(ia+amax2*ic)] = worktx[i+rank_*((ia+1)+amax2*ic)] - worktx[i+rank_*(ia+amax2*(ic+1))] + ac[0]*worktx[i+rank_*(ia+amax2*ic)];
88         worksy[i+rank_*(ia+amax2*ic)] = workty[i+rank_*((ia+1)+amax2*ic)] - workty[i+rank_*(ia+amax2*(ic+1))] + ac[1]*workty[i+rank_*(ia+amax2*ic)];
89         worksz[i+rank_*(ia+amax2*ic)] = worktz[i+rank_*((ia+1)+amax2*ic)] - worktz[i+rank_*(ia+amax2*(ic+1))] + ac[2]*worktz[i+rank_*(ia+amax2*ic)];
90       }
91 
92   double* const dataxx = out;
93   double* const dataxy = dataxx + size_block;
94   double* const dataxz = dataxy + size_block;
95   double* const datayy = dataxz + size_block;
96   double* const datayz = datayy + size_block;
97   double* const datazz = datayz + size_block;
98 
99   // assemble up to amax_, cmax_
100   for (int iz = 0; iz <= cmax_; ++iz) {
101     for (int iy = 0; iy <= cmax_ - iz; ++iy) {
102       for (int jz = 0; jz <= amax_; ++jz) {
103         for (int jy = 0; jy <= amax_ - jz; ++jy) {
104           const int offsetz = rank_ * (amax2 * iz + jz);
105           const int offsety = rank_ * (amax2 * iy + jy);
106           const int iyz = cmax1_ * (iy + cmax1_ * iz);
107           const int jyz = amax1_ * (jy + amax1_ * jz);
108           for (int i = 0; i != rank_; ++i) {
109             iyiz_nn[i] = worky [offsety + i] * workz [offsetz + i];
110             iyiz_tn[i] = workty[offsety + i] * workz [offsetz + i] * (1.0-roots[i]);
111             iyiz_nt[i] = worky [offsety + i] * worktz[offsetz + i] * (1.0-roots[i]);
112             iyiz_tt[i] = workty[offsety + i] * worktz[offsetz + i] * (1.0-roots[i]);
113             iyiz_sn[i] = worksy[offsety + i] * workz [offsetz + i];
114             iyiz_ns[i] = worky [offsety + i] * worksz[offsetz + i];
115           }
116           for (int ix = std::max(0, cmin_ - iy - iz); ix <= cmax_ - iy - iz; ++ix) {
117             const int iposition = cmap[ix + iyz];
118             const int ipos_asize = iposition * asize_;
119             for (int jx = std::max(0, amin_ - jy - jz); jx <= amax_ - jy - jz; ++jx) {
120               const int offsetx = rank_ * (amax2 * ix + jx);
121               const int jposition = amap[jx + jyz];
122               const int ijposition = jposition + ipos_asize;
123               dataxx[ijposition] = blas::dot_product_noconj(iyiz_nn, rank_, worksx+offsetx);
124               dataxy[ijposition] = blas::dot_product_noconj(iyiz_tn, rank_, worktx+offsetx);
125               dataxz[ijposition] = blas::dot_product_noconj(iyiz_nt, rank_, worktx+offsetx);
126               datayy[ijposition] = blas::dot_product_noconj(iyiz_sn, rank_, workx +offsetx);
127               datayz[ijposition] = blas::dot_product_noconj(iyiz_tt, rank_, workx +offsetx);
128               datazz[ijposition] = blas::dot_product_noconj(iyiz_ns, rank_, workx +offsetx);
129             }
130           }
131         }
132       }
133     }
134   }
135 
136 }
137 
138 }
139 
140 #endif
141