1 #define _XOPEN_SOURCE 500
2 #define _XOPEN_SOURCE_EXTENDED 1
3
4 #include <assert.h>
5 #include <math.h>
6 #include <stdio.h>
7 #include <stdlib.h>
8 #include <string.h>
9 #include <time.h>
10 #include <sys/times.h>
11 #include <unistd.h>
12
13 #define __CVERSION__
14 #include "general.h"
15 #include "integrator.h"
16 #include "functionals.h"
17
18 #include "inforb.h"
19
20
21 void FSYM(deq27)(const real* cmo, const real* ubo, const real* dv,
22 real* dxcao, real* dxvao, real* wrk, integer* lfrsav);
23 integer FSYM(isetksymop)(const integer *new_ksymop);
24
25 typedef struct {
26 real *dmata, *dmatb; /* Denisty matrices */
27 real *kappaY, *kappaZ; /* Perturbation vectors */
28 integer symY, symZ; /* Symmetry of perturbations */
29 integer spinY, spinZ; /* Spin rank of perturbations */
30 integer symYZ; /* Symmetry of mixed perturbation */
31 real *rhoa_y, *rhob_y; /* First order perturbed density (AO) */
32 real *rhoa_z, *rhob_z; /* First order perturbed density (AO) */
33 real *rhoa_yzzy; /* Second order perturbed density (AO) */
34 real *rhob_yzzy; /* Second order perturbed density (AO) */
35 real *rhowa_y, *rhowb_y; /* First order perturbed density (grid) */
36 real *rhowa_z, *rhowb_z; /* First order perturbed density (grid) */
37 real *rhowa_yzzy; /* Second order perturbed density (grid) */
38 real *rhowb_yzzy; /* Second order perturbed density (grid) */
39 real *res_omega_a; /* Omega dependent part of qr contr. */
40 real *res_omega_b; /* Omega dependent part of qr contr. */
41 real *res_omY_a; /* [Omega,Y] dependent part of qr contr. */
42 real *res_omY_b; /* [Omega,Y] dependent part of qr contr. */
43 real *res_omZ_a; /* [Omega,Z] dependent part of qr contr. */
44 real *res_omZ_b; /* [Omega,Z] dependent part of qr contr. */
45 real *vya; /* VXC contribution at the grid points */
46 real *vyb; /* VXC contribution at the grid points */
47 real *vza; /* VXC contribution at the grid points */
48 real *vzb; /* VXC contribution at the grid points */
49 real *tmpa; /* temporary variable for integration */
50 real *tmpb; /* temporary variable for integration */
51 real *pref3a; /* precomputed prefactors for grip points*/
52 real *pref3b; /* precomputed prefactors for grip points*/
53 real *prefb2a; /* precomputed prefactors for grip points*/
54 real *prefb2b; /* precomputed prefactors for grip points*/
55 real *prefc2a; /* precomputed prefactors for grip points*/
56 real *prefc2b; /* precomputed prefactors for grip points*/
57 } Quad_Open_Data;
58
59 static void
quad_open_lda_cb(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,Quad_Open_Data * data)60 quad_open_lda_cb(DftIntegratorBl* grid, real * RESTRICT tmp,
61 integer bllen, integer blstart, integer blend,
62 Quad_Open_Data* data)
63 {
64 static const real MONER = -1.0;
65
66 integer i, j, k, isym, ibl, jbl;
67 real * RESTRICT aos = grid->atv;
68 real *om_a = data->res_omega_a;
69 real *om_b = data->res_omega_b;
70 real *omYa = data->res_omY_a;
71 real *omYb = data->res_omY_b;
72 real *omZa = data->res_omZ_a;
73 real *omZb = data->res_omZ_b;
74 real *tmpa = data->tmpa;
75 real *tmpb = data->tmpb;
76 real *pref3a = data->pref3a;
77 real *pref3b = data->pref3b;
78 real *prefb2a = data->prefb2a;
79 real *prefb2b = data->prefb2b;
80 real *prefc2a = data->prefc2a;
81 real *prefc2b = data->prefc2b;
82 FunDensProp dp = { 0 };
83 FunThirdFuncDrv vxc;
84 integer mat_size = DFT_BLLEN;
85
86 /* compute vector of transformed densities */
87 FSYM2(getexp_blocked_lda)(&data->symY, data->rhoa_y, grid->atv,
88 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
89 tmp, &bllen, data->rhowa_y);
90 FSYM2(getexp_blocked_lda)(&data->symY, data->rhob_y, grid->atv,
91 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
92 tmp, &bllen, data->rhowb_y);
93 FSYM2(getexp_blocked_lda)(&data->symZ, data->rhoa_z, grid->atv,
94 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
95 tmp, &bllen, data->rhowa_z);
96 FSYM2(getexp_blocked_lda)(&data->symZ, data->rhob_z, grid->atv,
97 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
98 tmp, &bllen, data->rhowb_z);
99 FSYM2(getexp_blocked_lda)(&data->symYZ, data->rhoa_yzzy, grid->atv,
100 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
101 tmp, &bllen, data->rhowa_yzzy);
102 FSYM2(getexp_blocked_lda)(&data->symYZ, data->rhob_yzzy, grid->atv,
103 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
104 tmp, &bllen, data->rhowb_yzzy);
105 /* set triplet perturbations signs */
106 if (data->spinY==1) {
107 FSYM(dscal)(&mat_size,&MONER,data->rhowb_y,&ONEI);
108 FSYM(dscal)(&mat_size,&MONER,data->rhowb_yzzy,&ONEI);
109 }
110 if (data->spinZ==1) {
111 FSYM(dscal)(&mat_size,&MONER,data->rhowb_z,&ONEI);
112 FSYM(dscal)(&mat_size,&MONER,data->rhowb_yzzy,&ONEI);
113 }
114 /* compute derrivatives over grid points */
115 for(i=blstart; i<blend; i++) {
116 real weight = grid->weight[grid->curr_point+i];
117 dp.rhoa = grid->r.ho.a[i];
118 dp.rhob = grid->r.ho.b[i];
119 drv3_clear(&vxc);
120 selected_func->third(&vxc,weight,&dp);
121 pref3a[i] = vxc.df3000*data->rhowa_y[i]*data->rhowa_z[i]+
122 vxc.df2100*data->rhowb_y[i]*data->rhowa_z[i]+
123 vxc.df2100*data->rhowa_y[i]*data->rhowb_z[i]+
124 vxc.df1200*data->rhowb_y[i]*data->rhowb_z[i]+
125 vxc.df2000*data->rhowa_yzzy[i]+
126 vxc.df1100*data->rhowb_yzzy[i];
127 pref3b[i] = vxc.df0300*data->rhowb_y[i]*data->rhowb_z[i]+
128 vxc.df1200*data->rhowb_y[i]*data->rhowa_z[i]+
129 vxc.df1200*data->rhowa_y[i]*data->rhowb_z[i]+
130 vxc.df2100*data->rhowa_y[i]*data->rhowa_z[i]+
131 vxc.df0200*data->rhowb_yzzy[i]+
132 vxc.df1100*data->rhowa_yzzy[i];
133 pref3a[i] *= 0.5; pref3b[i] *= 0.5;
134 prefb2a[i] = vxc.df2000*data->rhowa_y[i]+vxc.df1100*data->rhowb_y[i];
135 prefb2b[i] = vxc.df0200*data->rhowb_y[i]+vxc.df1100*data->rhowa_y[i];
136 prefb2a[i] *= 0.5; prefb2b[i] *= 0.5;
137 prefc2a[i] = vxc.df2000*data->rhowa_z[i]+vxc.df1100*data->rhowb_z[i];
138 prefc2b[i] = vxc.df0200*data->rhowb_z[i]+vxc.df1100*data->rhowa_z[i];
139 prefc2a[i] *= 0.5; prefc2b[i] *= 0.5;
140 }
141
142 /* main loop over symmetries */
143 for(isym=0; isym<grid->nsym; isym++) {
144 integer (*RESTRICT iblocks)[2] = BASBLOCK(grid,isym);
145 integer ibl_cnt = grid->bas_bl_cnt[isym];
146 for(ibl=0; ibl<ibl_cnt; ibl++)
147 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
148 integer ioff = i*bllen;
149 for(k=blstart; k<blend; k++) {
150 tmpa[k+ioff] = pref3a[k]*aos[k+ioff];
151 tmpb[k+ioff] = pref3b[k]*aos[k+ioff];
152 data->vza[k+ioff] = -prefb2a[k]*aos[k+ioff];
153 data->vzb[k+ioff] = -prefb2b[k]*aos[k+ioff];
154 data->vya[k+ioff] = -prefc2a[k]*aos[k+ioff];
155 data->vyb[k+ioff] = -prefc2b[k]*aos[k+ioff];
156 }
157 }
158 /* Compute contributions to om, omY and omZ */
159 for(ibl=0; ibl<ibl_cnt; ibl++) {
160 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
161 integer jsym, jbl_cnt, ioff = i*inforb_.nbast;
162 real * RESTRICT tmpai = tmpa + i*bllen;
163 real * RESTRICT tmpbi = tmpb + i*bllen;
164 real * RESTRICT vyai = data->vya + i*bllen;
165 real * RESTRICT vybi = data->vyb + i*bllen;
166 real * RESTRICT vzai = data->vza + i*bllen;
167 real * RESTRICT vzbi = data->vzb + i*bllen;
168 integer (*RESTRICT jblocks)[2];
169
170 jsym = inforb_.muld2h[data->symYZ-1][isym]-1;
171 jblocks = BASBLOCK(grid,jsym);
172 jbl_cnt = grid->bas_bl_cnt[jsym];
173 for(jbl=0; jbl<jbl_cnt; jbl++) {
174 for(j=jblocks[jbl][0]-1; j<jblocks[jbl][1]; j++) {
175 real * RESTRICT aosj = aos + j*bllen;
176 real sa = 0;
177 real sb = 0;
178 for(k=blstart; k<blend; k++) {
179 sa += aosj[k]*tmpai[k];
180 sb += aosj[k]*tmpbi[k];
181 }
182 om_a[j+ioff] += sa;
183 om_b[j+ioff] += sb;
184 }
185 }
186
187 jsym = inforb_.muld2h[data->symZ-1][isym]-1;
188 jblocks = BASBLOCK(grid,jsym);
189 jbl_cnt = grid->bas_bl_cnt[jsym];
190 for(jbl=0; jbl<jbl_cnt; jbl++) {
191 for(j=jblocks[jbl][0]-1; j<jblocks[jbl][1]; j++) {
192 real * RESTRICT aosj = aos + j*bllen;
193 real sa = 0;
194 real sb = 0;
195 for(k=blstart; k<blend; k++) {
196 sa += aosj[k]*vyai[k];
197 sb += aosj[k]*vybi[k];
198 }
199 omYa[j+ioff] += sa;
200 omYb[j+ioff] += sb;
201 }
202 }
203
204 jsym = inforb_.muld2h[data->symY-1][isym]-1;
205 jblocks = BASBLOCK(grid,jsym);
206 jbl_cnt = grid->bas_bl_cnt[jsym];
207 for(jbl=0; jbl<jbl_cnt; jbl++) {
208 for(j=jblocks[jbl][0]-1; j<jblocks[jbl][1]; j++) {
209 real * RESTRICT aosj = aos + j*bllen;
210 real sa = 0;
211 real sb = 0;
212 for(k=blstart; k<blend; k++) {
213 sa += aosj[k]*vzai[k];
214 sb += aosj[k]*vzbi[k];
215 }
216 omZa[j+ioff] += sa;
217 omZb[j+ioff] += sb;
218 }
219 }
220 } /* Start next column */
221 }/* one batch of blocks */
222 } /* Loop over symmetries */
223 }
224
225
226 static void
quad_open_gga_cb(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,Quad_Open_Data * data)227 quad_open_gga_cb(DftIntegratorBl* grid, real * RESTRICT tmp,
228 integer bllen, integer blstart, integer blend,
229 Quad_Open_Data* data)
230 {
231
232 static const real MONER = -1.0;
233
234 integer i,j, k, ibl, jbl, isym;
235 real * RESTRICT aos = grid->atv;
236 real * RESTRICT aox = grid->atv+bllen*inforb_.nbast;
237 real * RESTRICT aoy = grid->atv+bllen*inforb_.nbast*2;
238 real * RESTRICT aoz = grid->atv+bllen*inforb_.nbast*3;
239 real *om_a = data->res_omega_a;
240 real *om_b = data->res_omega_b;
241 real *omYa = data->res_omY_a;
242 real *omYb = data->res_omY_b;
243 real *omZa = data->res_omZ_a;
244 real *omZb = data->res_omZ_b;
245 real *vya = data->vya;
246 real *vyb = data->vyb;
247 real *vza = data->vza;
248 real *vzb = data->vzb;
249 real *tmpa = data->tmpa;
250 real *tmpb = data->tmpb;
251 real (*pref3a)[4] = (real (*)[4])data->pref3a;
252 real (*pref3b)[4] = (real (*)[4])data->pref3b;
253 real (*prefb2a)[4] = (real (*)[4])data->prefb2a;
254 real (*prefb2b)[4] = (real (*)[4])data->prefb2b;
255 real (*prefc2a)[4] = (real (*)[4])data->prefc2a;
256 real (*prefc2b)[4] = (real (*)[4])data->prefc2b;
257 FunDensProp dp = { 0 };
258 FunThirdFuncDrv vxc;
259 integer mat_size = 4*DFT_BLLEN;
260 real zetaYa, zetaYb, zetaYab;
261 real zetaZa, zetaZb, zetaZab;
262 real zetaYZZYa, zetaYZZYb, zetaYZZYab;
263 real trYZaa, trYZbb, trYZab;
264 real fza2f, fzb2f, fg2f;
265 real fza1fy, fzb1fy, fg1fy;
266 real fza1fz, fzb1fz, fg1fz;
267 real fza0f, fzb0f, fg0f;
268 real fa1y, fb1y, fg1y;
269 real fa1z, fb1z, fg1z;
270
271 /* compute vectors of transformed densities and their gradients */
272 FSYM2(getexp_blocked_gga)(&data->symY, data->rhoa_y, grid->atv,
273 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
274 tmp, &bllen, (double(*)[4])data->rhowa_y);
275 FSYM2(getexp_blocked_gga)(&data->symY, data->rhob_y, grid->atv,
276 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
277 tmp, &bllen, (double(*)[4])data->rhowb_y);
278 FSYM2(getexp_blocked_gga)(&data->symZ, data->rhoa_z, grid->atv,
279 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
280 tmp, &bllen, (double(*)[4])data->rhowa_z);
281 FSYM2(getexp_blocked_gga)(&data->symZ, data->rhob_z, grid->atv,
282 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
283 tmp, &bllen, (double(*)[4])data->rhowb_z);
284 FSYM2(getexp_blocked_gga)(&data->symYZ, data->rhoa_yzzy, grid->atv,
285 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
286 tmp, &bllen, (double(*)[4])data->rhowa_yzzy);
287 FSYM2(getexp_blocked_gga)(&data->symYZ, data->rhob_yzzy, grid->atv,
288 grid->bas_bl_cnt, grid->basblocks, &grid->shl_bl_cnt,
289 tmp, &bllen, (double(*)[4])data->rhowb_yzzy);
290 /* set triplet perturbations signs */
291 if (data->spinY==1) {
292 FSYM(dscal)(&mat_size,&MONER,data->rhowb_y,&ONEI);
293 FSYM(dscal)(&mat_size,&MONER,data->rhowb_yzzy,&ONEI);
294 }
295 if (data->spinZ==1) {
296 FSYM(dscal)(&mat_size,&MONER,data->rhowb_z,&ONEI);
297 FSYM(dscal)(&mat_size,&MONER,data->rhowb_yzzy,&ONEI);
298 }
299 real (*trYa)[4] = (real (*)[4])data->rhowa_y;
300 real (*trYb)[4] = (real (*)[4])data->rhowb_y;
301 real (*trZa)[4] = (real (*)[4])data->rhowa_z;
302 real (*trZb)[4] = (real (*)[4])data->rhowb_z;
303 real (*trYZZYa)[4] = (real (*)[4])data->rhowa_yzzy;
304 real (*trYZZYb)[4] = (real (*)[4])data->rhowb_yzzy;
305
306 for(i=blstart; i<blend; i++) {
307 real weight = grid->weight[grid->curr_point+i];
308 dp.rhoa = grid->r.ho.a[i];
309 dp.rhob = grid->r.ho.b[i];
310 dp.grada = sqrt(grid->g.rad.a [i][0]*grid->g.rad.a [i][0]+
311 grid->g.rad.a [i][1]*grid->g.rad.a [i][1]+
312 grid->g.rad.a [i][2]*grid->g.rad.a [i][2]);
313 dp.gradb = sqrt(grid->g.rad.b [i][0]*grid->g.rad.b [i][0]+
314 grid->g.rad.b [i][1]*grid->g.rad.b [i][1]+
315 grid->g.rad.b [i][2]*grid->g.rad.b [i][2]);
316 dp.gradab = grid->g.rad.a [i][0]*grid->g.rad.b [i][0]+
317 grid->g.rad.a [i][1]*grid->g.rad.b [i][1]+
318 grid->g.rad.a [i][2]*grid->g.rad.b [i][2];
319 if(dp.grada<1e-20) dp.grada = 1e-20;
320 if(dp.gradb<1e-20) dp.gradb = 1e-20;
321 /* Clear derrivatives and compute them for current point*/
322 drv3_clear(&vxc);
323 selected_func->third(&vxc,weight,&dp);
324 /* First order variations of denisty gradient */
325 zetaYa = trYa[i][1]*grid->g.rad.a[i][0]+trYa[i][2]*grid->g.rad.a[i][1]+
326 trYa[i][3]*grid->g.rad.a[i][2];
327 zetaYb = trYb[i][1]*grid->g.rad.b[i][0]+trYb[i][2]*grid->g.rad.b[i][1]+
328 trYb[i][3]*grid->g.rad.b[i][2];
329 zetaZa = trZa[i][1]*grid->g.rad.a[i][0]+trZa[i][2]*grid->g.rad.a[i][1]+
330 trZa[i][3]*grid->g.rad.a[i][2];
331 zetaZb = trZb[i][1]*grid->g.rad.b[i][0]+trZb[i][2]*grid->g.rad.b[i][1]+
332 trZb[i][3]*grid->g.rad.b[i][2];
333 zetaYa = zetaYa/dp.grada; zetaYb = zetaYb/dp.gradb;
334 zetaZa = zetaZa/dp.grada; zetaZb = zetaZb/dp.gradb;
335 zetaYab = trYa[i][1]*grid->g.rad.b[i][0]+trYa[i][2]*grid->g.rad.b[i][1]+
336 trYa[i][3]*grid->g.rad.b[i][2]+trYb[i][1]*grid->g.rad.a[i][0]+
337 trYb[i][2]*grid->g.rad.a[i][1]+trYb[i][3]*grid->g.rad.a[i][2];
338 zetaZab = trZa[i][1]*grid->g.rad.b[i][0]+trZa[i][2]*grid->g.rad.b[i][1]+
339 trZa[i][3]*grid->g.rad.b[i][2]+trZb[i][1]*grid->g.rad.a[i][0]+
340 trZb[i][2]*grid->g.rad.a[i][1]+trZb[i][3]*grid->g.rad.a[i][2];
341 /* Second order variations of density gradient */
342 zetaYZZYa = trYZZYa[i][1]*grid->g.rad.a[i][0]+
343 trYZZYa[i][2]*grid->g.rad.a[i][1]+
344 trYZZYa[i][3]*grid->g.rad.a[i][2];
345 zetaYZZYb = trYZZYb[i][1]*grid->g.rad.b[i][0]+
346 trYZZYb[i][2]*grid->g.rad.b[i][1]+
347 trYZZYb[i][3]*grid->g.rad.b[i][2];
348 zetaYZZYa = zetaYZZYa/dp.grada;
349 zetaYZZYb = zetaYZZYb/dp.gradb;
350 zetaYZZYab = trYZZYa[i][1]*grid->g.rad.b[i][0]+trYZZYa[i][2]*grid->g.rad.b[i][1]+
351 trYZZYa[i][3]*grid->g.rad.b[i][2]+trYZZYb[i][1]*grid->g.rad.a[i][0]+
352 trYZZYb[i][2]*grid->g.rad.a[i][1]+trYZZYb[i][3]*grid->g.rad.a[i][2];
353 trYZaa = trYa[i][1]*trZa[i][1]+trYa[i][2]*trZa[i][2]+trYa[i][3]*trZa[i][3];
354 trYZbb = trYb[i][1]*trZb[i][1]+trYb[i][2]*trZb[i][2]+trYb[i][3]*trZb[i][3];
355 trYZab = trYa[i][1]*trZb[i][1]+trYa[i][2]*trZb[i][2]+trYa[i][3]*trZb[i][3]+
356 trYb[i][1]*trZa[i][1]+trYb[i][2]*trZa[i][2]+trYb[i][3]*trZa[i][3];
357 /*****************************************************************/
358 /*** Density operator times 2-th order variation of functional ***/
359 /*****************************************************************/
360 /* FRRR part: */
361 pref3a[i][0] = vxc.df3000*trYa[i][0]*trZa[i][0]+vxc.df2100*trYb[i][0]*trZa[i][0]+
362 vxc.df2100*trYa[i][0]*trZb[i][0]+vxc.df1200*trYb[i][0]*trZb[i][0];
363 pref3b[i][0] = vxc.df0300*trYb[i][0]*trZb[i][0]+vxc.df1200*trYb[i][0]*trZa[i][0]+
364 vxc.df1200*trYa[i][0]*trZb[i][0]+vxc.df2100*trYa[i][0]*trZa[i][0];
365 /* FRRZ part: */
366 pref3a[i][0] += vxc.df2010*trYa[i][0]*zetaZa+vxc.df1110*trYb[i][0]*zetaZa+
367 vxc.df2001*trYa[i][0]*zetaZb+vxc.df1101*trYb[i][0]*zetaZb+
368 vxc.df2010*trZa[i][0]*zetaYa+vxc.df1110*trZb[i][0]*zetaYa+
369 vxc.df2001*trZa[i][0]*zetaYb+vxc.df1101*trZb[i][0]*zetaYb;
370 pref3b[i][0] += vxc.df0201*trYb[i][0]*zetaZb+vxc.df1101*trYa[i][0]*zetaZb+
371 vxc.df0210*trYb[i][0]*zetaZa+vxc.df1101*trYa[i][0]*zetaZa+
372 vxc.df0201*trZb[i][0]*zetaYb+vxc.df1101*trZa[i][0]*zetaYb+
373 vxc.df0210*trZb[i][0]*zetaYa+vxc.df1101*trZa[i][0]*zetaYa;
374 /* FRRG part: */
375 pref3a[i][0] += vxc.df20001*trYa[i][0]*zetaZab+vxc.df11001*trYb[i][0]*zetaZab+
376 vxc.df20001*trZa[i][0]*zetaYab+vxc.df11001*trZb[i][0]*zetaYab;
377 pref3b[i][0] += vxc.df02001*trYb[i][0]*zetaZab+vxc.df11001*trYa[i][0]*zetaZab+
378 vxc.df02001*trZb[i][0]*zetaYab+vxc.df11001*trZa[i][0]*zetaYab;
379 /* FRZZ part: */
380 pref3a[i][0] += vxc.df1020*zetaYa*zetaZa+vxc.df1011*zetaYa*zetaZb+
381 vxc.df1011*zetaYb*zetaZa+vxc.df1002*zetaYb*zetaZb;
382 pref3b[i][0] += vxc.df0102*zetaYb*zetaZb+vxc.df0111*zetaYb*zetaZa+
383 vxc.df0111*zetaYa*zetaZb+vxc.df0120*zetaYa*zetaZa;
384 /* FRZG part: not implemented in Dalton */
385 /* FRGG part: not implemented in Dalton */
386 /* FRR part: */
387 pref3a[i][0] += vxc.df2000*trYZZYa[i][0]+vxc.df1100*trYZZYb[i][0];
388 pref3b[i][0] += vxc.df0200*trYZZYb[i][0]+vxc.df1100*trYZZYa[i][0];
389 /* FRZ part: */
390 pref3a[i][0] += vxc.df1010*zetaYZZYa+vxc.df1001*zetaYZZYb;
391 pref3b[i][0] += vxc.df0101*zetaYZZYb+vxc.df0110*zetaYZZYa;
392 pref3a[i][0] += vxc.df1010*trYZaa/dp.grada+vxc.df1001*trYZbb/dp.gradb;
393 pref3b[i][0] += vxc.df0101*trYZbb/dp.gradb+vxc.df0110*trYZaa/dp.grada;
394 pref3a[i][0] += -(vxc.df1010*zetaYa*zetaZa/dp.grada+vxc.df1001*zetaYb*zetaZb/dp.gradb);
395 pref3b[i][0] += -(vxc.df0110*zetaYa*zetaZa/dp.grada+vxc.df0101*zetaYb*zetaZb/dp.gradb);
396 /* FRG part: */
397 pref3a[i][0] += vxc.df10001*zetaYZZYab+vxc.df10001*trYZab;
398 pref3b[i][0] += vxc.df01001*zetaYZZYab+vxc.df01001*trYZab;
399 pref3a[i][0] *= 0.5; pref3b[i][0] *= 0.5;
400 /***************************************************************************/
401 /*** Density gradient operator times 2-th order variation of functional ****/
402 /***************************************************************************/
403 /* FZZZ part: */
404 fza2f = vxc.df0030*zetaYa*zetaZa+vxc.df0021*zetaYa*zetaZb+
405 vxc.df0021*zetaYb*zetaZa+vxc.df0012*zetaYb*zetaZb;
406 fzb2f = vxc.df0003*zetaYb*zetaZb+vxc.df0012*zetaYb*zetaZa+
407 vxc.df0012*zetaYa*zetaZb+vxc.df0021*zetaYa*zetaZa;
408 /* FZZR part: */
409 fza2f += vxc.df1020*trYa[i][0]*zetaZa+vxc.df1011*trYa[i][0]*zetaZb+
410 vxc.df0120*trYb[i][0]*zetaZa+vxc.df0111*trYb[i][0]*zetaZb+
411 vxc.df1020*trZa[i][0]*zetaYa+vxc.df1011*trZa[i][0]*zetaYb+
412 vxc.df0120*trZb[i][0]*zetaYa+vxc.df0111*trZb[i][0]*zetaYa;
413 fzb2f += vxc.df0102*trYb[i][0]*zetaZb+vxc.df0111*trYb[i][0]*zetaZa+
414 vxc.df1002*trYa[i][0]*zetaZb+vxc.df1011*trYa[i][0]*zetaZa+
415 vxc.df0102*trZb[i][0]*zetaYb+vxc.df0111*trZb[i][0]*zetaYa+
416 vxc.df1002*trZa[i][0]*zetaYb+vxc.df1011*trZa[i][0]*zetaYa;
417 /* FZZG part: not implemented in Dalton */
418 /* FZRR part: */
419 fza2f += vxc.df2010*trYa[i][0]*trZa[i][0]+vxc.df1110*trYa[i][0]*trZb[i][0]+
420 vxc.df1110*trZa[i][0]*trYb[i][0]+vxc.df0210*trZb[i][0]*trYb[i][0];
421 fzb2f += vxc.df0201*trYb[i][0]*trZb[i][0]+vxc.df1101*trYa[i][0]*trZb[i][0]+
422 vxc.df1101*trZa[i][0]*trYb[i][0]+vxc.df2001*trYa[i][0]*trZa[i][0];
423 /* FZRG part: not implemented in Dalton */
424 /* FZZ part: */
425 fza2f += vxc.df0020*zetaYZZYa+vxc.df0011*zetaYZZYb;
426 fzb2f += vxc.df0011*zetaYZZYa+vxc.df0002*zetaYZZYb;
427 fza2f += vxc.df0020*trYZaa/dp.grada+vxc.df0011*trYZbb/dp.gradb;
428 fzb2f += vxc.df0002*trYZbb/dp.gradb+vxc.df0011*trYZaa/dp.grada;
429 // Need additional testting summed in closed shell case with other terms !!!!
430 fza2f += -(vxc.df0020*zetaYa*zetaZa/dp.grada+vxc.df0011*zetaYb*zetaZb/dp.gradb);
431 fzb2f += -(vxc.df0011*zetaYa*zetaZa/dp.grada+vxc.df0002*zetaYb*zetaZb/dp.gradb);
432 /* FZR part: */
433 fza2f += vxc.df1010*trYZZYa[i][0]+vxc.df0110*trYZZYb[i][0];
434 fzb2f += vxc.df1001*trYZZYa[i][0]+vxc.df0101*trYZZYb[i][0];
435 /* FZG part: not implemented in Dalton */
436 /* Scale contribution with density gradient operator prefactor */
437 fza2f *= 1.0/dp.grada; fzb2f *= 1.0/dp.gradb;
438 /* distribute this contribution: */
439 pref3a[i][1] = fza2f*grid->g.rad.a[i][0];
440 pref3a[i][2] = fza2f*grid->g.rad.a[i][1];
441 pref3a[i][3] = fza2f*grid->g.rad.a[i][2];
442 pref3b[i][1] = fzb2f*grid->g.rad.b[i][0];
443 pref3b[i][2] = fzb2f*grid->g.rad.b[i][1];
444 pref3b[i][3] = fzb2f*grid->g.rad.b[i][2];
445 /*********************************************************************************/
446 /*** Mixed density gradient operator times 2-th order variation of functional ****/
447 /*********************************************************************************/
448 /* FGGG part: not implemented in Dalton */
449 /* FGGZ part: not implemented in Dalton */
450 /* FGGR part: not implemented in Dalton */
451 /* FGZZ part: not implemented in Dalton */
452 /* FGRR part: */
453 fg2f = vxc.df20001*trYa[i][0]*trZa[i][0]+vxc.df11001*trYa[i][0]*trZb[i][0]+
454 vxc.df11001*trYb[i][0]*trZa[i][0]+vxc.df02001*trYb[i][0]*trZb[i][0];
455 /* FGG part: not implemented in Dalton */
456 /* FGZ part: not implemented in Dalton */
457 /* FGR part: */
458 fg2f += vxc.df10001*trYZZYa[i][0]+vxc.df01001*trYZZYb[i][0];
459 /* distribute this contribution: */
460 pref3a[i][1] += fg2f*grid->g.rad.b[i][0];
461 pref3a[i][2] += fg2f*grid->g.rad.b[i][1];
462 pref3a[i][3] += fg2f*grid->g.rad.b[i][2];
463 pref3b[i][1] += fg2f*grid->g.rad.a[i][0];
464 pref3b[i][2] += fg2f*grid->g.rad.a[i][1];
465 pref3b[i][3] += fg2f*grid->g.rad.a[i][2];
466 /**********************************************************************************/
467 /*** Density gradient operator variation (Z) times variation of functional (Y) ****/
468 /**********************************************************************************/
469 /* FZZ part: */
470 fza1fy = vxc.df0020*zetaYa+vxc.df0011*zetaYb;
471 fzb1fy = vxc.df0002*zetaYb+vxc.df0011*zetaYa;
472 /* FZR part: */
473 fza1fy += vxc.df1010*trYa[i][0]+vxc.df0110*trYb[i][0];
474 fzb1fy += vxc.df1001*trYa[i][0]+vxc.df0101*trYb[i][0];
475 /* Scale contribution with density gradient operator prefactor */
476 fza1fy *= 1.0/dp.grada; fzb1fy *= 1.0/dp.gradb;
477 /* distribute this contribution: */
478 pref3a[i][1] += fza1fy*trZa[i][1];
479 pref3a[i][2] += fza1fy*trZa[i][2];
480 pref3a[i][3] += fza1fy*trZa[i][3];
481 pref3b[i][1] += fzb1fy*trZb[i][1];
482 pref3b[i][2] += fzb1fy*trZb[i][2];
483 pref3b[i][3] += fzb1fy*trZb[i][3];
484 /* Second part: scale this contribution */
485 fza1fy *= -zetaZa/dp.grada;
486 fzb1fy *= -zetaZb/dp.gradb;
487 /* distribute this contribution */
488 pref3a[i][1] += fza1fy*grid->g.rad.a[i][0];
489 pref3a[i][2] += fza1fy*grid->g.rad.a[i][1];
490 pref3a[i][3] += fza1fy*grid->g.rad.a[i][2];
491 pref3b[i][1] += fzb1fy*grid->g.rad.b[i][0];
492 pref3b[i][2] += fzb1fy*grid->g.rad.b[i][1];
493 pref3b[i][3] += fzb1fy*grid->g.rad.b[i][2];
494 /**********************************************************************************/
495 /*** Density gradient operator variation (Y) times variation of functional (Z) ****/
496 /**********************************************************************************/
497 /* FZZ part: */
498 fza1fz = vxc.df0020*zetaZa+vxc.df0011*zetaZb;
499 fzb1fz = vxc.df0002*zetaZb+vxc.df0011*zetaZa;
500 /* FZR part: */
501 fza1fz += vxc.df1010*trZa[i][0]+vxc.df0110*trZb[i][0];
502 fzb1fz += vxc.df1001*trZa[i][0]+vxc.df0101*trZb[i][0];
503 /* Scale contribution with density gradient operator prefactor */
504 fza1fz *= 1.0/dp.grada; fzb1fz *= 1.0/dp.gradb;
505 /* distribute this contribution: */
506 pref3a[i][1] += fza1fz*trYa[i][1];
507 pref3a[i][2] += fza1fz*trYa[i][2];
508 pref3a[i][3] += fza1fz*trYa[i][3];
509 pref3b[i][1] += fzb1fz*trYb[i][1];
510 pref3b[i][2] += fzb1fz*trYb[i][2];
511 pref3b[i][3] += fzb1fz*trYb[i][3];
512 /* Second part: scale this contribution */
513 fza1fz *= -zetaYa/dp.grada;
514 fzb1fz *= -zetaYb/dp.gradb;
515 /* distribute this contribution */
516 pref3a[i][1] += fza1fz*grid->g.rad.a[i][0];
517 pref3a[i][2] += fza1fz*grid->g.rad.a[i][1];
518 pref3a[i][3] += fza1fz*grid->g.rad.a[i][2];
519 pref3b[i][1] += fzb1fz*grid->g.rad.b[i][0];
520 pref3b[i][2] += fzb1fz*grid->g.rad.b[i][1];
521 pref3b[i][3] += fzb1fz*grid->g.rad.b[i][2];
522 /****************************************************************************************/
523 /*** Mixed density gradient operator variation (Z) times variation of functional (Y) ****/
524 /****************************************************************************************/
525 /* FGG not implemented in Dalton */
526 /* FGZ not implemented in Dalton */
527 /* FGR part: */
528 fg1fy = vxc.df10001*trYa[i][0]+vxc.df10001*trYb[i][0];
529 //fg1fy *= 2.0;
530 pref3a[i][1] += fg1fy*trZb[i][1];
531 pref3a[i][2] += fg1fy*trZb[i][2];
532 pref3a[i][3] += fg1fy*trZb[i][3];
533 pref3b[i][1] += fg1fy*trZa[i][1];
534 pref3b[i][2] += fg1fy*trZa[i][2];
535 pref3b[i][3] += fg1fy*trZa[i][3];
536 /****************************************************************************************/
537 /*** Mixed density gradient operator variation (Y) times variation of functional (Z) ****/
538 /****************************************************************************************/
539 /* FGG not implemented in Dalton */
540 /* FGZ not implemented in Dalton */
541 /* FGR part: */
542 fg1fz = vxc.df10001*trZa[i][0]+vxc.df10001*trZb[i][0];
543 pref3a[i][1] += fg1fz*trYb[i][1];
544 pref3a[i][2] += fg1fz*trYb[i][2];
545 pref3a[i][3] += fg1fz*trYb[i][3];
546 pref3b[i][1] += fg1fz*trYa[i][1];
547 pref3b[i][2] += fg1fz*trYa[i][2];
548 pref3b[i][3] += fg1fz*trYa[i][3];
549 /**************************************************************/
550 /*** Second order variation of density gradient operator ***/
551 /**************************************************************/
552 fza0f = vxc.df0010/dp.grada;
553 fzb0f = vxc.df0001/dp.gradb;
554 pref3a[i][1] += fza0f*trYZZYa[i][1];
555 pref3a[i][2] += fza0f*trYZZYa[i][2];
556 pref3a[i][3] += fza0f*trYZZYa[i][3];
557 pref3b[i][1] += fzb0f*trYZZYb[i][1];
558 pref3b[i][2] += fzb0f*trYZZYb[i][2];
559 pref3b[i][3] += fzb0f*trYZZYb[i][3];
560 /* second part of this contribution */
561 fza0f = -vxc.df0010*zetaYZZYa/(dp.grada*dp.grada);
562 fzb0f = -vxc.df0001*zetaYZZYb/(dp.gradb*dp.gradb);
563 pref3a[i][1] += fza0f*grid->g.rad.a[i][0];
564 pref3a[i][2] += fza0f*grid->g.rad.a[i][1];
565 pref3a[i][3] += fza0f*grid->g.rad.a[i][2];
566 pref3b[i][1] += fzb0f*grid->g.rad.b[i][0];
567 pref3b[i][2] += fzb0f*grid->g.rad.b[i][1];
568 pref3b[i][3] += fzb0f*grid->g.rad.b[i][2];
569 /* third part of this contribution */
570 fza0f = -vxc.df0010*trYZaa/(dp.grada*dp.grada*dp.grada);
571 fzb0f = -vxc.df0001*trYZbb/(dp.gradb*dp.gradb*dp.gradb);
572 pref3a[i][1] += fza0f*grid->g.rad.a[i][0];
573 pref3a[i][2] += fza0f*grid->g.rad.a[i][1];
574 pref3a[i][3] += fza0f*grid->g.rad.a[i][2];
575 pref3b[i][1] += fzb0f*grid->g.rad.b[i][0];
576 pref3b[i][2] += fzb0f*grid->g.rad.b[i][1];
577 pref3b[i][3] += fzb0f*grid->g.rad.b[i][2];
578 /* fourth part of this contribution */
579 fza0f = -vxc.df0010*zetaYa/(dp.grada*dp.grada);
580 fzb0f = -vxc.df0001*zetaYb/(dp.gradb*dp.gradb);
581 pref3a[i][1] += fza0f*trZa[i][1];
582 pref3a[i][2] += fza0f*trZa[i][2];
583 pref3a[i][3] += fza0f*trZa[i][3];
584 pref3b[i][1] += fzb0f*trZb[i][1];
585 pref3b[i][2] += fzb0f*trZb[i][2];
586 pref3b[i][3] += fzb0f*trZb[i][3];
587 /* fifth part of this contribution */
588 fza0f = -vxc.df0010*zetaZa/(dp.grada*dp.grada);
589 fzb0f = -vxc.df0001*zetaZb/(dp.gradb*dp.gradb);
590 pref3a[i][1] += fza0f*trYa[i][1];
591 pref3a[i][2] += fza0f*trYa[i][2];
592 pref3a[i][3] += fza0f*trYa[i][3];
593 pref3b[i][1] += fzb0f*trYb[i][1];
594 pref3b[i][2] += fzb0f*trYb[i][2];
595 pref3b[i][3] += fzb0f*trYb[i][3];
596 /* third part od this contribution with mixed perturbations */
597 fza0f = 3.0*vxc.df0010*zetaYa*zetaZa/(dp.grada*dp.grada*dp.grada);
598 fzb0f = 3.0*vxc.df0001*zetaYb*zetaZb/(dp.gradb*dp.gradb*dp.gradb);
599 pref3a[i][1] += fza0f*grid->g.rad.a[i][0];
600 pref3a[i][2] += fza0f*grid->g.rad.a[i][1];
601 pref3a[i][3] += fza0f*grid->g.rad.a[i][2];
602 pref3b[i][1] += fzb0f*grid->g.rad.b[i][0];
603 pref3b[i][2] += fzb0f*grid->g.rad.b[i][1];
604 pref3b[i][3] += fzb0f*grid->g.rad.b[i][2];
605 /********************************************************************/
606 /*** Second order variation of mixed density gradient operator ***/
607 /********************************************************************/
608 fg0f = vxc.df00001;
609 pref3a[i][1] += fg0f*trYZZYb[i][1];
610 pref3a[i][2] += fg0f*trYZZYb[i][2];
611 pref3a[i][3] += fg0f*trYZZYb[i][3];
612 pref3b[i][1] += fg0f*trYZZYa[i][1];
613 pref3b[i][2] += fg0f*trYZZYa[i][2];
614 pref3b[i][3] += fg0f*trYZZYa[i][3];
615 /*************************************/
616 /*** Second order (Y) contribution ***/
617 /*************************************/
618 prefb2a[i][0] = vxc.df2000*trYa[i][0]+vxc.df1100*trYb[i][0]+
619 vxc.df1010*zetaYa+vxc.df1001*zetaYb+
620 vxc.df10001*zetaYab;
621 prefb2b[i][0] = vxc.df0200*trYb[i][0]+vxc.df1100*trYa[i][0]+
622 vxc.df0110*zetaYa+vxc.df0101*zetaYb+
623 vxc.df01001*zetaYab;
624 prefb2a[i][0] *= 0.5; prefb2b[i][0] *= 0.5;
625 fa1y = (vxc.df0020*zetaYa+vxc.df0011*zetaYb+vxc.df1010*trYa[i][0]+
626 vxc.df0110*trYb[i][0])/dp.grada-vxc.df0010*zetaYa/(dp.grada*dp.grada);
627 fb1y = (vxc.df0002*zetaYb+vxc.df0011*zetaYa+vxc.df1001*trYa[i][0]+
628 vxc.df0101*trYb[i][0])/dp.gradb-vxc.df0001*zetaYb/(dp.gradb*dp.gradb);
629 prefb2a[i][1] = fa1y*grid->g.rad.a[i][0];
630 prefb2a[i][2] = fa1y*grid->g.rad.a[i][1];
631 prefb2a[i][3] = fa1y*grid->g.rad.a[i][2];
632 prefb2b[i][1] = fb1y*grid->g.rad.b[i][0];
633 prefb2b[i][2] = fb1y*grid->g.rad.b[i][1];
634 prefb2b[i][3] = fb1y*grid->g.rad.b[i][2];
635 fg1y = vxc.df10001*trYa[i][0]+vxc.df01001*trYb[i][0];
636 prefb2a[i][1] += fg1y*grid->g.rad.b[i][0];
637 prefb2a[i][2] += fg1y*grid->g.rad.b[i][1];
638 prefb2a[i][3] += fg1y*grid->g.rad.b[i][2];
639 prefb2b[i][1] += fg1y*grid->g.rad.a[i][0];
640 prefb2b[i][2] += fg1y*grid->g.rad.a[i][1];
641 prefb2b[i][3] += fg1y*grid->g.rad.a[i][2];
642 fa1y = vxc.df0010/dp.grada;
643 fb1y = vxc.df0001/dp.gradb;
644 prefb2a[i][1] += fa1y*trYa[i][1];
645 prefb2a[i][2] += fa1y*trYa[i][2];
646 prefb2a[i][3] += fa1y*trYa[i][3];
647 prefb2b[i][1] += fb1y*trYb[i][1];
648 prefb2b[i][2] += fb1y*trYb[i][2];
649 prefb2b[i][3] += fb1y*trYb[i][3];
650 fg1y = vxc.df00001;
651 prefb2a[i][1] += fg1y*trYb[i][1];
652 prefb2a[i][2] += fg1y*trYb[i][2];
653 prefb2a[i][3] += fg1y*trYb[i][3];
654 prefb2b[i][1] += fg1y*trYa[i][1];
655 prefb2b[i][2] += fg1y*trYa[i][2];
656 prefb2b[i][3] += fg1y*trYa[i][3];
657 /*************************************/
658 /*** Second order (Z) contribution ***/
659 /*************************************/
660 prefc2a[i][0] = vxc.df2000*trZa[i][0]+vxc.df1100*trZb[i][0]+
661 vxc.df1010*zetaZa+vxc.df1001*zetaZb+
662 vxc.df10001*zetaZab;
663 prefc2b[i][0] = vxc.df0200*trZb[i][0]+vxc.df1100*trZa[i][0]+
664 vxc.df0110*zetaZa+vxc.df0101*zetaZb+
665 vxc.df01001*zetaZab;
666 prefc2a[i][0] *= 0.5; prefc2b[i][0] *= 0.5;
667 fa1z = (vxc.df0020*zetaZa+vxc.df0011*zetaZb+vxc.df1010*trZa[i][0]+
668 vxc.df0110*trZb[i][0])/dp.grada-vxc.df0010*zetaZa/(dp.grada*dp.grada);
669 fb1z = (vxc.df0002*zetaZb+vxc.df0011*zetaZa+vxc.df1001*trZa[i][0]+
670 vxc.df0101*trZb[i][0])/dp.gradb-vxc.df0001*zetaZb/(dp.gradb*dp.gradb);
671 prefc2a[i][1] = fa1z*grid->g.rad.a[i][0];
672 prefc2a[i][2] = fa1z*grid->g.rad.a[i][1];
673 prefc2a[i][3] = fa1z*grid->g.rad.a[i][2];
674 prefc2b[i][1] = fb1z*grid->g.rad.b[i][0];
675 prefc2b[i][2] = fb1z*grid->g.rad.b[i][1];
676 prefc2b[i][3] = fb1z*grid->g.rad.b[i][2];
677 fg1z = vxc.df10001*trZa[i][0]+vxc.df01001*trZb[i][0];
678 prefc2a[i][1] += fg1z*grid->g.rad.b[i][0];
679 prefc2a[i][2] += fg1z*grid->g.rad.b[i][1];
680 prefc2a[i][3] += fg1z*grid->g.rad.b[i][2];
681 prefc2b[i][1] += fg1z*grid->g.rad.a[i][0];
682 prefc2b[i][2] += fg1z*grid->g.rad.a[i][1];
683 prefc2b[i][3] += fg1z*grid->g.rad.a[i][2];
684 fa1z = vxc.df0010/dp.grada;
685 fb1z = vxc.df0001/dp.gradb;
686 prefc2a[i][1] += fa1z*trZa[i][1];
687 prefc2a[i][2] += fa1z*trZa[i][2];
688 prefc2a[i][3] += fa1z*trZa[i][3];
689 prefc2b[i][1] += fb1z*trZb[i][1];
690 prefc2b[i][2] += fb1z*trZb[i][2];
691 prefc2b[i][3] += fb1z*trZb[i][3];
692 fg1z = vxc.df00001;
693 prefc2a[i][1] += fg1z*trZb[i][1];
694 prefc2a[i][2] += fg1z*trZb[i][2];
695 prefc2a[i][3] += fg1z*trZb[i][3];
696 prefc2b[i][1] += fg1z*trZa[i][1];
697 prefc2b[i][2] += fg1z*trZa[i][2];
698 prefc2b[i][3] += fg1z*trZa[i][3];
699 }
700 /* Compute VXC[3] contribution */
701 for(isym=0; isym<grid->nsym; isym++) {
702 integer (*RESTRICT iblocks)[2] = BASBLOCK(grid,isym);
703 integer ibl_cnt = grid->bas_bl_cnt[isym];
704 for(ibl=0; ibl<ibl_cnt; ibl++)
705 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
706 real * RESTRICT a0 = aos + i*bllen;
707 real * RESTRICT ax = aox + i*bllen;
708 real * RESTRICT ay = aoy + i*bllen;
709 real * RESTRICT az = aoz + i*bllen;
710 real * RESTRICT vyai = vya + i*bllen;
711 real * RESTRICT vybi = vyb + i*bllen;
712 real * RESTRICT vzai = vza + i*bllen;
713 real * RESTRICT vzbi = vzb + i*bllen;
714 real * RESTRICT tmpai = tmpa + i*bllen;
715 real * RESTRICT tmpbi = tmpb + i*bllen;
716 for(k=blstart; k<blend; k++) {
717 vzai[k] = -(a0[k]*prefb2a[k][0]+
718 ax[k]*prefb2a[k][1]+
719 ay[k]*prefb2a[k][2]+
720 az[k]*prefb2a[k][3]);
721 vzbi[k] = -(a0[k]*prefb2b[k][0]+
722 ax[k]*prefb2b[k][1]+
723 ay[k]*prefb2b[k][2]+
724 az[k]*prefb2b[k][3]);
725 vyai[k] = -(a0[k]*prefc2a[k][0]+
726 ax[k]*prefc2a[k][1]+
727 ay[k]*prefc2a[k][2]+
728 az[k]*prefc2a[k][3]);
729 vybi[k] = -(a0[k]*prefc2b[k][0]+
730 ax[k]*prefc2b[k][1]+
731 ay[k]*prefc2b[k][2]+
732 az[k]*prefc2b[k][3]);
733 tmpai[k] = a0[k]*pref3a[k][0]+
734 ax[k]*pref3a[k][1]+
735 ay[k]*pref3a[k][2]+
736 az[k]*pref3a[k][3];
737 tmpbi[k] = a0[k]*pref3b[k][0]+
738 ax[k]*pref3b[k][1]+
739 ay[k]*pref3b[k][2]+
740 az[k]*pref3b[k][3];
741 }
742 }
743 /* distribute VXC[3] contributions */
744 for(ibl=0; ibl<ibl_cnt; ibl++) {
745 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
746 integer ioff = i*inforb_.nbast;
747 real suma;
748 real sumb;
749 real * RESTRICT tmpai = tmpa + i*bllen;
750 real * RESTRICT tmpbi = tmpb + i*bllen;
751 real * RESTRICT vyai = vya + i*bllen;
752 real * RESTRICT vybi = vyb + i*bllen;
753 real * RESTRICT vzai = vza + i*bllen;
754 real * RESTRICT vzbi = vzb + i*bllen;
755 integer jsym = inforb_.muld2h[data->symYZ-1][isym]-1;
756 integer (*RESTRICT jblocks)[2] = BASBLOCK(grid,jsym);
757 integer jbl_cnt = grid->bas_bl_cnt[jsym];
758 for(jbl=0; jbl<jbl_cnt; jbl++) {
759 for(j=jblocks[jbl][0]-1; j<jblocks[jbl][1]; j++) {
760 real * RESTRICT aosj = aos + j*bllen;
761 suma = 0; sumb = 0;
762 for(k=blstart; k<blend; k++) {
763 suma += aosj[k]*tmpai[k];
764 sumb += aosj[k]*tmpbi[k];
765 }
766 om_a[j+ioff] += suma;
767 om_b[j+ioff] += sumb;
768 }
769 }
770 jsym = inforb_.muld2h[data->symZ-1][isym]-1;
771 jblocks = BASBLOCK(grid,jsym);
772 jbl_cnt = grid->bas_bl_cnt[jsym];
773 for(jbl=0; jbl<jbl_cnt; jbl++) {
774 for(j=jblocks[jbl][0]-1; j<jblocks[jbl][1]; j++) {
775 real * RESTRICT aosj = aos + j*bllen;
776 suma = 0; sumb = 0;
777 for(k=blstart; k<blend; k++) {
778 suma += aosj[k]*vyai[k];
779 sumb += aosj[k]*vybi[k];
780 }
781 omYa[j+ioff] += suma;
782 omYb[j+ioff] += sumb;
783 }
784 }
785 jsym = inforb_.muld2h[data->symY-1][isym]-1;
786 jblocks = BASBLOCK(grid,jsym);
787 jbl_cnt = grid->bas_bl_cnt[jsym];
788 for(jbl=0; jbl<jbl_cnt; jbl++) {
789 for(j=jblocks[jbl][0]-1; j<jblocks[jbl][1]; j++) {
790 real * RESTRICT aosj = aos + j*bllen;
791 suma = 0; sumb =0;
792 for(k=blstart; k<blend; k++) {
793 suma += aosj[k]*vzai[k];
794 sumb += aosj[k]*vzbi[k];
795 }
796 omZa[j+ioff] += suma;
797 omZb[j+ioff] += sumb;
798 }
799 }
800 }
801 }
802 } /*Loop over symmetries */
803 }
804
805 Quad_Open_Data
quad_open_init(real * cmo,real * kappaY,real * kappaZ,integer * symY,integer * symZ,integer * spinY,integer * spinZ)806 quad_open_init(real *cmo, real *kappaY, real *kappaZ,
807 integer *symY, integer *symZ, integer *spinY, integer *spinZ)
808 {
809 Quad_Open_Data data;
810
811 /* Initialize repsonse data */
812 /* FIX ME: change for dal_maloc after testing! */
813 data.dmata = calloc(2*inforb_.n2basx,sizeof(real));
814 data.dmatb = data.dmata+inforb_.n2basx;
815 data.kappaY = kappaY;
816 data.kappaZ = kappaZ;
817 data.symY = *symY;
818 data.symZ = *symZ;
819 data.symYZ = inforb_.muld2h[data.symY-1][data.symZ-1];
820 data.spinY = *spinY;
821 data.spinZ = *spinZ;
822
823 /* Allocate one-index transformed densities */
824 data.rhoa_y = calloc(inforb_.n2basx,sizeof(real));
825 data.rhob_y = calloc(inforb_.n2basx,sizeof(real));
826 data.rhoa_z = calloc(inforb_.n2basx,sizeof(real));
827 data.rhob_z = calloc(inforb_.n2basx,sizeof(real));
828 /* Allocate two-index transformed densities */
829 data.rhoa_yzzy = calloc(inforb_.n2basx,sizeof(real));
830 data.rhob_yzzy = calloc(inforb_.n2basx,sizeof(real));
831 /* Allocate one-index transfromed densities over grid batch */
832 if(selected_func->is_gga()) {
833 data.rhowa_y = malloc(4*DFT_BLLEN*sizeof(real));
834 data.rhowb_y = malloc(4*DFT_BLLEN*sizeof(real));
835 data.rhowa_z = malloc(4*DFT_BLLEN*sizeof(real));
836 data.rhowb_z = malloc(4*DFT_BLLEN*sizeof(real));
837 data.rhowa_yzzy = malloc(4*DFT_BLLEN*sizeof(real));
838 data.rhowb_yzzy = malloc(4*DFT_BLLEN*sizeof(real));
839 data.vya = malloc(4*DFT_BLLEN*inforb_.nbast*sizeof(real));
840 data.vyb = malloc(4*DFT_BLLEN*inforb_.nbast*sizeof(real));
841 data.vza = malloc(4*DFT_BLLEN*inforb_.nbast*sizeof(real));
842 data.vzb = malloc(4*DFT_BLLEN*inforb_.nbast*sizeof(real));
843 } else {
844 data.rhowa_y = malloc(DFT_BLLEN*sizeof(real));
845 data.rhowb_y = malloc(DFT_BLLEN*sizeof(real));
846 data.rhowa_z = malloc(DFT_BLLEN*sizeof(real));
847 data.rhowb_z = malloc(DFT_BLLEN*sizeof(real));
848 data.rhowa_yzzy = malloc(DFT_BLLEN*sizeof(real));
849 data.rhowb_yzzy = malloc(DFT_BLLEN*sizeof(real));
850 data.vya = malloc(DFT_BLLEN*inforb_.nbast*sizeof(real));
851 data.vyb = malloc(DFT_BLLEN*inforb_.nbast*sizeof(real));
852 data.vza = malloc(DFT_BLLEN*inforb_.nbast*sizeof(real));
853 data.vzb = malloc(DFT_BLLEN*inforb_.nbast*sizeof(real));
854 }
855 /* Allocate VX[3] contr. components AO basis */
856 data.res_omega_a = calloc(inforb_.n2basx,sizeof(real));
857 data.res_omega_b = calloc(inforb_.n2basx,sizeof(real));
858 data.res_omY_a = calloc(inforb_.n2basx,sizeof(real));
859 data.res_omY_b = calloc(inforb_.n2basx,sizeof(real));
860 data.res_omZ_a = calloc(inforb_.n2basx,sizeof(real));
861 data.res_omZ_b = calloc(inforb_.n2basx,sizeof(real));
862 /* Alocate temp. var. for integration */
863 if(selected_func->is_gga()) {
864 data.tmpa = calloc(4*DFT_BLLEN*inforb_.nbast,sizeof(real));
865 data.tmpb = calloc(4*DFT_BLLEN*inforb_.nbast,sizeof(real));
866 } else {
867 data.tmpa = calloc(DFT_BLLEN*inforb_.nbast,sizeof(real));
868 data.tmpb = calloc(DFT_BLLEN*inforb_.nbast,sizeof(real));
869 }
870 /* Prefactors */
871 if(selected_func->is_gga()) {
872 data.pref3a = calloc(4*DFT_BLLEN,sizeof(real));
873 data.pref3b = calloc(4*DFT_BLLEN,sizeof(real));
874 data.prefb2a = calloc(4*DFT_BLLEN,sizeof(real));
875 data.prefb2b = calloc(4*DFT_BLLEN,sizeof(real));
876 data.prefc2a = calloc(4*DFT_BLLEN,sizeof(real));
877 data.prefc2b = calloc(4*DFT_BLLEN,sizeof(real));
878 } else {
879 data.pref3a = calloc(DFT_BLLEN,sizeof(real));
880 data.pref3b = calloc(DFT_BLLEN,sizeof(real));
881 data.prefb2a = calloc(DFT_BLLEN,sizeof(real));
882 data.prefb2b = calloc(DFT_BLLEN,sizeof(real));
883 data.prefc2a = calloc(DFT_BLLEN,sizeof(real));
884 data.prefc2b = calloc(DFT_BLLEN,sizeof(real));
885 }
886 return data;
887 }
888
889 static void
quad_open_free(Quad_Open_Data data)890 quad_open_free(Quad_Open_Data data)
891 {
892 /* Free memory */
893 free(data.dmata);
894 free(data.rhoa_y);
895 free(data.rhob_y);
896 free(data.rhoa_z);
897 free(data.rhob_z);
898 free(data.rhoa_yzzy);
899 free(data.rhob_yzzy);
900 free(data.rhowa_y);
901 free(data.rhowb_y);
902 free(data.rhowa_z);
903 free(data.rhowb_z);
904 free(data.rhowa_yzzy);
905 free(data.rhowb_yzzy);
906 free(data.vya);
907 free(data.vyb);
908 free(data.vza);
909 free(data.vzb);
910 free(data.res_omega_a);
911 free(data.res_omega_b);
912 free(data.res_omY_a);
913 free(data.res_omY_b);
914 free(data.res_omZ_a);
915 free(data.res_omZ_b);
916 free(data.tmpa);
917 free(data.tmpb);
918 free(data.pref3a);
919 free(data.pref3b);
920 free(data.prefb2a);
921 free(data.prefb2b);
922 free(data.prefc2a);
923 free(data.prefc2b);
924 }
925
926 static void
commute_den_x(integer den,real * RESTRICT kappaY,integer symY,real * RESTRICT com_mat)927 commute_den_x(integer den, real * RESTRICT kappaY,
928 integer symY, real * RESTRICT com_mat)
929 {
930 integer isym, i, j;
931 integer norbt = inforb_.norbt;
932 integer iocc, jocc;
933
934 real * res, * ky;
935 /* if den 1 alpha, else beta density */
936 for(isym=0; isym<inforb_.nsym; isym++) {
937 /* the block in question corresponds to (isym,jsym) block */
938 integer istart= inforb_.iorb[isym];
939 integer iorb = inforb_.norb[isym];
940 integer jsym = inforb_.muld2h[symY-1][isym]-1;
941 integer jstart= inforb_.iorb[jsym];
942 integer jorb = inforb_.norb[jsym];
943 if (den) {
944 iocc = inforb_.nocc[isym];
945 jocc = inforb_.nocc[jsym];
946 } else {
947 iocc = inforb_.nish[isym];
948 jocc = inforb_.nish[jsym];
949 }
950 res = com_mat + istart + jstart*norbt;
951 ky = kappaY + istart + jstart*norbt;
952 /* occupied columns */
953 for(j=0; j<jocc; j++) {
954 for(i=0; i<iocc; i++) res[i+j*norbt] = 0;
955 for(i=iocc; i<iorb; i++) res[i+j*norbt] = -ky[i+j*norbt];
956 }
957 /* virtual columns */
958 for(j=jocc; j<jorb; j++) {
959 for(i=0; i<iocc; i++) res[i+j*norbt] = ky[i+j*norbt];
960 for(i=iocc; i<iorb; i++) res[i+j*norbt] = 0;
961 }
962 }
963 }
964
965
966 static __inline__ void
commute_matrices(integer sz,const real * a,const real * b,real * c,integer addp)967 commute_matrices(integer sz, const real* a, const real* b,
968 real* c, integer addp)
969 {
970 static const real MONER = -1.0;
971 const real* firstpref = addp ? &ONER : &ZEROR;
972 /* this could be optimized to use symmetry... */
973 dgemm_("N", "N", &sz, &sz, &sz, &ONER,
974 a, &sz, b, &sz, firstpref, c, &sz);
975 dgemm_("N", "N", &sz, &sz, &sz, &MONER,
976 b, &sz, a, &sz, &ONER, c, &sz);
977 }
978
979
980 static void
transform_mat(real * cmo,real * matmo,integer symYZ,real * matao)981 transform_mat(real *cmo, real *matmo, integer symYZ, real *matao)
982 {
983 static const real HALFR = 0.5;
984 real * tmp;
985 integer isym, norbt, nbast;
986
987 norbt = inforb_.norbt;
988 nbast = inforb_.nbast;
989 tmp = calloc(norbt*nbast,sizeof(real));
990 for(isym=0; isym<inforb_.nsym; isym++) {
991 integer ibasi = inforb_.ibas[isym];
992 integer nbasi = inforb_.nbas[isym];
993 integer iorbi = inforb_.iorb[isym];
994 integer norbi = inforb_.norb[isym];
995 integer icmoi = inforb_.icmo[isym];
996 integer jsym = inforb_.muld2h[symYZ-1][isym]-1;
997 integer ibasj = inforb_.ibas[jsym];
998 integer nbasj = inforb_.nbas[jsym];
999 integer iorbj = inforb_.iorb[jsym];
1000 integer norbj = inforb_.norb[jsym];
1001 integer icmoj = inforb_.icmo[jsym];
1002 if(norbi == 0 || norbj == 0) continue;
1003 dgemm_("N","N", &nbasi, &norbj, &norbi,
1004 &HALFR, cmo + icmoi, &nbasi, matmo + iorbi + iorbj*norbt, &norbt,
1005 &ZEROR, tmp, &nbasi);
1006 dgemm_("N","T", &nbasi, &nbasj, &norbj,
1007 &ONER, tmp, &nbasi, cmo + icmoj, &nbasj,
1008 &ZEROR, matao + ibasi + ibasj*nbast, &nbast);
1009 }
1010 free(tmp);
1011 }
1012
1013 /***********************************************************************/
1014 /* Quadratic Response for High Spin Open-Shell */
1015 /* Z. Rinkevicius */
1016 /* =================================================================== */
1017 void
FSYM2(dft_qr_ab)1018 FSYM2(dft_qr_ab)(real * fi, real * fo, real *cmo,
1019 real *kappaY, integer *symY, integer *spinY,
1020 real *kappaZ, integer *symZ, integer *spinZ,
1021 integer *addfock, real *work, integer *lwork, integer *iprint)
1022 {
1023 static const real DP5R = 0.5;
1024 static const real MDP5R = -0.5;
1025 static const real ONER = 1.0;
1026 integer i, j, isymsav;
1027 real * dv;
1028 struct tms starttm, endtm; clock_t utm;
1029 Quad_Open_Data qr_data;
1030 real electrons, exp_el = 2.0*inforb_.nisht+inforb_.nasht;
1031 real *tmp, *tmpa, *tmpb, *kappa_yz;
1032 times(&starttm);
1033
1034 qr_data = quad_open_init(cmo,kappaY,kappaZ,symY,symZ,spinY,spinZ);
1035
1036 /* Get Density in AO basis */
1037 FSYM2(dft_get_ao_dens_matab)(cmo,qr_data.dmatb,qr_data.dmata,work,lwork);
1038 FSYM(daxpy)(&inforb_.n2basx,&DP5R,qr_data.dmatb,&ONEI,qr_data.dmata,&ONEI);
1039 FSYM(dscal)(&inforb_.n2basx,&DP5R,qr_data.dmatb,&ONEI);
1040 /* Get active density matrix in MO basis*/
1041 dv=dal_malloc(inforb_.n2ashx*sizeof(real));
1042 FSYM(dunit)(dv,&inforb_.nasht);
1043 /* Compute one index transformed densities */
1044 isymsav = FSYM(isetksymop)(symY);
1045 FSYM(deq27)(cmo,kappaY,dv,qr_data.rhob_y,qr_data.rhoa_y,work,lwork);
1046 dscal_(&inforb_.n2basx,&DP5R,qr_data.rhob_y,&ONEI);
1047 FSYM(daxpy)(&inforb_.n2basx,&ONER,qr_data.rhob_y,&ONEI,qr_data.rhoa_y,&ONEI);
1048 FSYM(isetksymop)(symZ);
1049 FSYM(deq27)(cmo,kappaZ,dv,qr_data.rhob_z,qr_data.rhoa_z,work,lwork);
1050 dscal_(&inforb_.n2basx,&DP5R,qr_data.rhob_z,&ONEI);
1051 FSYM(daxpy)(&inforb_.n2basx,&ONER,qr_data.rhob_z,&ONEI,qr_data.rhoa_z,&ONEI);
1052 FSYM(isetksymop)(&isymsav);
1053 free(dv);
1054 /* Compute two-index transformed densities */
1055 tmp = calloc(inforb_.n2orbx,sizeof(real));
1056 kappa_yz = calloc(inforb_.n2basx,sizeof(real));
1057 commute_den_x(1,kappaY,*symY,tmp);
1058 commute_matrices(inforb_.norbt,tmp,kappaZ,kappa_yz,0);
1059 FSYM(dzero)(tmp,&inforb_.n2orbx);
1060 commute_den_x(1,kappaZ,*symZ,tmp);
1061 commute_matrices(inforb_.norbt,tmp,kappaY,kappa_yz,1);
1062 transform_mat(cmo,kappa_yz,qr_data.symYZ,qr_data.rhoa_yzzy);
1063 FSYM(dzero)(tmp,&inforb_.n2orbx);
1064 FSYM(dzero)(kappa_yz,&inforb_.n2orbx);
1065 commute_den_x(0,kappaY,*symY,tmp);
1066 commute_matrices(inforb_.norbt,tmp,kappaZ,kappa_yz,0);
1067 FSYM(dzero)(tmp,&inforb_.n2orbx);
1068 commute_den_x(0,kappaZ,*symZ,tmp);
1069 commute_matrices(inforb_.norbt,tmp,kappaY,kappa_yz,1);
1070 transform_mat(cmo,kappa_yz,qr_data.symYZ,qr_data.rhob_yzzy);
1071 free(tmp);
1072 free(kappa_yz);
1073 /* Integrate VXC[3] */
1074 electrons = dft_integrate_ao_bl( 2, qr_data.dmata, work, lwork, iprint, 0,
1075 (DftBlockCallback)(selected_func->is_gga() ?
1076 quad_open_gga_cb:quad_open_lda_cb),
1077 &qr_data);
1078
1079 /* Symmetrize integrated contributions */
1080 for(i=0; i<inforb_.nbast; i++) {
1081 for(j=0; j<i; j++) {
1082 integer ji = j + i*inforb_.nbast;
1083 integer ij = i + j*inforb_.nbast;
1084 real avg = 0.5*(qr_data.res_omega_a[ij]+qr_data.res_omega_a[ji]);
1085 qr_data.res_omega_a[ij] = qr_data.res_omega_a[ji] = avg;
1086 avg = 0.5*(qr_data.res_omega_b[ij]+qr_data.res_omega_b[ji]);
1087 qr_data.res_omega_b[ij] = qr_data.res_omega_b[ji] = avg;
1088 avg = 0.5*(qr_data.res_omY_a[ij]+qr_data.res_omY_a[ji]);
1089 qr_data.res_omY_a[ij] = qr_data.res_omY_a[ji] = avg;
1090 avg = 0.5*(qr_data.res_omY_b[ij]+qr_data.res_omY_b[ji]);
1091 qr_data.res_omY_b[ij] = qr_data.res_omY_b[ji] = avg;
1092 avg = 0.5*(qr_data.res_omZ_a[ij]+qr_data.res_omZ_a[ji]);
1093 qr_data.res_omZ_a[ij] = qr_data.res_omZ_a[ji] = avg;
1094 avg = 0.5*(qr_data.res_omZ_b[ij]+qr_data.res_omZ_b[ji]);
1095 qr_data.res_omZ_b[ij] = qr_data.res_omZ_b[ji] = avg;
1096 }
1097 }
1098 /* Transform contrbutions to MO basis and add them */
1099 tmpa = calloc(inforb_.n2orbx,sizeof(real));
1100 tmpb = calloc(inforb_.n2orbx,sizeof(real));
1101 FSYM(lrao2mo)(cmo, &qr_data.symYZ, qr_data.res_omega_a, tmpa, work, lwork);
1102 FSYM(lrao2mo)(cmo, &qr_data.symYZ, qr_data.res_omega_b, tmpb, work, lwork);
1103 FSYM(daxpy)(&inforb_.n2orbx, &ONER, tmpa, &ONEI, fi, &ONEI);
1104 FSYM(daxpy)(&inforb_.n2orbx, &MDP5R, tmpa, &ONEI, fo, &ONEI);
1105 FSYM(daxpy)(&inforb_.n2orbx, &DP5R, tmpb, &ONEI, fo, &ONEI);
1106 /* [ky,Vxc(Z)] */
1107 FSYM(dzero)(tmpa, &inforb_.n2orbx);
1108 FSYM(dzero)(tmpb, &inforb_.n2orbx);
1109 FSYM(lrao2mo)(cmo, &qr_data.symZ, qr_data.res_omY_a, tmpa, work, lwork);
1110 commute_matrices(inforb_.norbt, tmpa, kappaY, tmpb, 1);
1111 FSYM(daxpy)(&inforb_.n2orbx, &ONER, tmpb, &ONEI, fi, &ONEI);
1112 FSYM(daxpy)(&inforb_.n2orbx, &MDP5R, tmpb, &ONEI, fo, &ONEI);
1113 FSYM(dzero)(tmpa, &inforb_.n2orbx);
1114 FSYM(dzero)(tmpb, &inforb_.n2orbx);
1115 FSYM(lrao2mo)(cmo, &qr_data.symZ, qr_data.res_omY_b, tmpa, work, lwork);
1116 commute_matrices(inforb_.norbt, tmpa, kappaY, tmpb, 1);
1117 FSYM(daxpy)(&inforb_.n2orbx, &DP5R, tmpb, &ONEI, fo, &ONEI);
1118 /* [kz,Vxc(Y)] */
1119 FSYM(dzero)(tmpa, &inforb_.n2orbx);
1120 FSYM(dzero)(tmpb, &inforb_.n2orbx);
1121 FSYM(lrao2mo)(cmo, &qr_data.symY, qr_data.res_omZ_a, tmpa, work, lwork);
1122 commute_matrices(inforb_.norbt, tmpa, kappaZ, tmpb, 1);
1123 FSYM(daxpy)(&inforb_.n2orbx, &ONER, tmpb, &ONEI, fi, &ONEI);
1124 FSYM(daxpy)(&inforb_.n2orbx, &MDP5R, tmpb, &ONEI, fo, &ONEI);
1125 FSYM(dzero)(tmpa, &inforb_.n2orbx);
1126 FSYM(dzero)(tmpb, &inforb_.n2orbx);
1127 FSYM(lrao2mo)(cmo, &qr_data.symY, qr_data.res_omZ_b, tmpa, work, lwork);
1128 commute_matrices(inforb_.norbt, tmpa, kappaZ, tmpb, 1);
1129 FSYM(daxpy)(&inforb_.n2orbx, &DP5R, tmpb, &ONEI, fo, &ONEI);
1130 /* clean up memory */
1131 free(tmpa);
1132 free(tmpb);
1133 quad_open_free(qr_data);
1134 /* print time and exit */
1135 if (*iprint>0) {
1136 times(&endtm);
1137 utm = endtm.tms_utime-starttm.tms_utime;
1138 fort_print("Electrons: %f(%9.3f): QR-DFT-AB evaluation time: %9.1f s",
1139 electrons,exp_el, utm/(double)sysconf(_SC_CLK_TCK));
1140 }
1141 }
1142