1 /*
2
3
4 !
5 ! Dalton, a molecular electronic structure program
6 ! Copyright (C) by the authors of Dalton.
7 !
8 ! This program is free software; you can redistribute it and/or
9 ! modify it under the terms of the GNU Lesser General Public
10 ! License version 2.1 as published by the Free Software Foundation.
11 !
12 ! This program is distributed in the hope that it will be useful,
13 ! but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ! Lesser General Public License for more details.
16 !
17 ! If a copy of the GNU LGPL v2.1 was not distributed with this
18 ! code, you can obtain one at https://www.gnu.org/licenses/old-licenses/lgpl-2.1.en.html.
19 !
20
21 !
22
23 */
24 /*-*-mode: C; c-indentation-style: "bsd"; c-basic-offset: 4; -*-*/
25 /* The DFT Propery evaluators.
26 (c) Pawel Salek, pawsa@theochem.kth.se.
27 2002.04.05
28
29 This file is written in C because it is easier to write portable
30 programs in C than in Fortran due to more strict syntax checking and
31 existing language standard that is actually obeyed by compiler writers
32 (not to mention language features).
33
34 This module evaluates DFT contribution to different properties,
35 in particular:
36 a). DFT contribution to the Fock/KS matrix (dft_kohn_sham)
37 b). DFT contribution to the linear response (dft_lin_resp)
38 c). DFT contribution to the molecular gradient (dft_mol_grad)
39 */
40 /* strictly conform to XOPEN ANSI C standard */
41 #define _XOPEN_SOURCE 500
42 #define _XOPEN_SOURCE_EXTENDED 1
43
44 #include <assert.h>
45 #include <math.h>
46 #include <stdio.h>
47 #include <stdlib.h>
48 #include <time.h>
49 #include <sys/times.h>
50 #include <unistd.h>
51
52 #define __CVERSION__
53 #include "general.h"
54 #include "integrator.h"
55 #include "functionals.h"
56
57 #include "inforb.h"
58
59 const static integer KOHNSH_DEBUG = 0;
60 const static integer DFTLR_DEBUG = 0;
61 const static integer DFTMAG_DEBUG = 0;
62
63 #if defined(VAR_MPI)
64 #include <mpi.h>
65 #include <our_extra_mpi.h>
66 #define MASTER_NO 0
67 #endif
68 #if 0 && defined(VAR_MPI)
69
70 /* dft_kohn_sham_slave:
71 this is a slave driver. It's task is to allocate memory needed by
72 the main property evaluator (dft_kohn_sham in this case) and call it.
73 */
74 void
75 dft_kohn_sham_slave(real* work, integer* lwork, integer* iprint)
76 {
77 real* dmat = malloc(inforb_.n2basx*sizeof(real));
78 real* ksm = calloc(inforb_.n2basx,sizeof(real));
79 integer iprint = 0;
80 dft_kohn_sham_(dmat, ksm, work, lwork, &iprint);
81 free(dmat);
82 free(ksm);
83 }
84
85 static __inline__ void
86 dft_kohn_sham_sync_slaves(real* dmat)
87 {
88 MPI_Bcast(dmat, inforb_.n2basx,MPI_DOUBLE,
89 MASTER_NO, MPI_COMM_WORLD);
90 }
91
92 static __inline__ void
93 dft_kohn_sham_collect_info(real*ksm, real* energy, real* work, integer lwork)
94 {
95 real tmp = *energy;
96 integer sz = 0;
97 MPI_Comm_size(MPI_COMM_WORLD, &sz);
98 if(sz <=1) return;
99 CHECK_WRKMEM(inforb_.n2basx, lwork);
100 dcopy_(&inforb_.n2basx, ksm,&ONEI, work, &ONEI);
101 MPI_Reduce(work, ksm, inforb_.n2basx, MPI_DOUBLE, MPI_SUM,
102 MASTER_NO, MPI_COMM_WORLD);
103 MPI_Reduce(&tmp, energy, 1, MPI_DOUBLE, MPI_SUM,
104 MASTER_NO, MPI_COMM_WORLD);
105 }
106
107 #else /* VAR_MPI */
108 #define dft_kohn_sham_sync_slaves(dmat)
109 #define dft_kohn_sham_collect_info(myksm, ksm, energy,lwork)
110 #endif /* VAR_MPI */
111
112 static real energy = 0.0;
113
114 /* ------------------------------------------------------------------- */
115 /* ---------- DFT LINEAR RESPONSE CONTRIBUTION EVALUATOR ------------- */
116 /* ------------------------------------------------------------------- */
117 void FSYM(deq27)(const real* cmo, const real* ubo, const real* dv,
118 real* dxcao, real* dxvao, real* wrk, integer* lfrsav);
119 void FSYM(autpv)(const integer*isym,const integer*jsym,
120 const real*u,const real* v,
121 real*prpao, const integer*nbas,const integer*nbast, real*prpmo,
122 const integer*norb, const integer*norbt,
123 real* wrk,integer* lwrk);
124
125 typedef struct {
126 real* dmat, *kappa, *res;
127 real* dtgao;
128 integer trplet, ksymop;
129 } LinRespData;
130
131
132 static __inline__ real
min(real a,real b)133 min(real a, real b)
134 { return a>b ? b : a; }
135 static __inline__ real
max(real a,real b)136 max(real a, real b)
137 { return a>b ? a : b; }
138
139 /* dft_lin_resp_slave:
140 this is a slave driver. It's task is to allocate memory needed by
141 the main property evaluator (dft_lin_resp in this case) and call
142 it. The computed values are collected in the evaluater using
143 dft_lin_resp_collect info and we can just drop the arrays on the
144 floor when we are done.
145 FIXME: we can use work for the temp data....
146 */
147 #if defined(VAR_MPI)
148
149 extern void FSYM(dftlrsync)(void);
150 static void
dft_lin_resp_sync_slaves(real * cmo,integer * nvec,real ** zymat,integer * trplet,integer * ksymop,real ** work,integer lwork)151 dft_lin_resp_sync_slaves(real* cmo, integer *nvec, real **zymat,
152 integer* trplet, integer* ksymop,
153 real **work, integer lwork)
154 {
155 static SyncData data2[] =
156 { {NULL, 0, MPI_DOUBLE}, {NULL, 0, MPI_DOUBLE}, {NULL, 0, fortran_MPI_INT},
157 {NULL, 0, fortran_MPI_INT} };
158 MPI_Bcast(nvec, 1, fortran_MPI_INT, MASTER_NO, MPI_COMM_WORLD);
159 FSYM(dftlrsync)();
160 if(*zymat == NULL) { /* we are a slave */
161 if(lwork<*nvec*inforb_.n2orbx)
162 dalton_quit("%s: slave needs %d words for ZYMAT, available %d",
163 __FUNCTION__, *nvec*inforb_.n2orbx, lwork);
164 *zymat = *work;
165 *work += *nvec*inforb_.n2orbx;
166 }
167 data2[0].data = cmo; data2[0].count = inforb_.ncmot;
168 data2[1].data = *zymat; data2[1].count = *nvec*inforb_.n2orbx;
169 data2[2].data = trplet; data2[2].count = 1;
170 data2[3].data = ksymop; data2[3].count = 1;
171 mpi_sync_data(data2, ELEMENTS(data2));
172 }
173
174 static __inline__ void
dft_lin_resp_collect_info(integer nvec,real * fmat,real * work,integer lwork)175 dft_lin_resp_collect_info(integer nvec, real* fmat, real*work, integer lwork)
176 {
177 int sz_mpi = 0;
178 MPI_Comm_size(MPI_COMM_WORLD, &sz_mpi);
179 if(sz_mpi<=1) return;
180
181 integer sz = nvec*inforb_.n2basx;
182 sz_mpi = sz;
183 if(sz>lwork) {
184 CHECK_WRKMEM(inforb_.n2basx,lwork);
185 for(sz=0; sz<nvec; sz++) {
186 dcopy_(&inforb_.n2basx, fmat + sz*inforb_.n2basx,
187 &ONEI, work, &ONEI);
188 integer len_for_reduce = inforb_.n2basx;
189 MPI_Reduce(work, fmat+sz*inforb_.n2basx, len_for_reduce,
190 MPI_DOUBLE, MPI_SUM, MASTER_NO, MPI_COMM_WORLD);
191 }
192 } else {
193 CHECK_WRKMEM(sz, lwork);
194 dcopy_(&sz, fmat, &ONEI, work, &ONEI);
195 MPI_Reduce(work, fmat, sz_mpi, MPI_DOUBLE, MPI_SUM,
196 MASTER_NO, MPI_COMM_WORLD);
197 }
198 }
199
200 #else /* VAR_MPI */
201 #define dft_lin_resp_sync_slaves(cmo,nvec,zymat,trplet,ksymop,work,lwork)
202 #define dft_lin_resp_collect_info(nvec,fmat,work,lwork)
203 #endif /* VAR_MPI */
204
205 static void
lin_resp_cb(DftGrid * grid,LinRespData * data)206 lin_resp_cb(DftGrid* grid, LinRespData* data)
207 {
208 real b0, b3[3];
209 integer isym, i, j;
210 SecondDrv vxc;
211
212 dgemv_("N",&inforb_.nbast,&inforb_.nbast,&ONER,
213 data->kappa,&inforb_.nbast,grid->atv,&ONEI,&ZEROR,
214 data->dtgao,&ONEI);
215 b0 = ddot_(&inforb_.nbast,data->dtgao,&ONEI,grid->atv,&ONEI);
216 if(grid->dogga) {
217 real brg, brz, facr, facg, bmax, vt[4];
218 real *atvX = &grid->atv[inforb_.nbast ];
219 real *atvY = &grid->atv[inforb_.nbast*2];
220 real *atvZ = &grid->atv[inforb_.nbast*3];
221 real ngrad = (grid->dp.grada + grid->dp.gradb);
222 /* B3 = GAO1'*DTGAO; */
223 dgemv_("T",&inforb_.nbast,&THREEI,&ONER,atvX, &inforb_.nbast,
224 data->dtgao,&ONEI, &ZEROR, b3, &ONEI);
225 /* DTGAO= DTRMAT'*GAO */
226 dgemv_("T",&inforb_.nbast, &inforb_.nbast, &ONER, data->kappa,
227 &inforb_.nbast,grid->atv,&ONEI,&ZEROR,data->dtgao,&ONEI);
228 /* B3 = B3 + GAO1'*DTGAO */
229 dgemv_("T",&inforb_.nbast,&THREEI,&ONER,atvX,
230 &inforb_.nbast,data->dtgao,&ONEI,&ONER,b3,&ONEI);
231 bmax = max(fabs(b0),max(fabs(b3[0]),max(fabs(b3[1]),fabs(b3[2]))));
232 if(bmax<=grid->dfthri) return;
233 brg = (b3[0]*grid->grada[0] +
234 b3[1]*grid->grada[1] +
235 b3[2]*grid->grada[2])*2;
236 brz = brg/ngrad;
237
238 dftpot1_(&vxc, &grid->curr_weight, &grid->dp, &data->trplet);
239 facr = vxc.fRZ*b0 + (vxc.fZZ-vxc.fZ/ngrad)*brz + vxc.fZG*brg;
240 facr = 2*(facr/ngrad + (vxc.fRG*b0+vxc.fZG*brz +vxc.fGG*brg));
241 facg = vxc.fZ/ngrad + vxc.fG;
242 vt[0] = vxc.fRR*b0 + vxc.fRZ*brz+ vxc.fRG*brg;
243 vt[1] = facr*grid->grada[0] + facg*b3[0];
244 vt[2] = facr*grid->grada[1] + facg*b3[1];
245 vt[3] = facr*grid->grada[2] + facg*b3[2];
246 for(isym=0; isym<inforb_.nsym; isym++) {
247 integer istr = inforb_.ibas[isym];
248 integer iend = inforb_.ibas[isym] + inforb_.nbas[isym];
249 integer jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
250 if (isym>=jsym) {
251 integer jstr = inforb_.ibas[jsym];
252 integer jend = inforb_.ibas[jsym] + inforb_.nbas[jsym];
253 for(i=istr; i<iend; i++) {
254 real g0 = grid->atv[i];
255 real gx = atvX[i];
256 real gy = atvY[i];
257 real gz = atvZ[i];
258 integer ioff = i*inforb_.nbast;
259 integer jtop = min(jend,i+1);
260 for(j=jstr; j<jtop; j++) {
261 real a0 = g0*grid->atv[j];
262 real ax = gx*grid->atv[j] + g0*atvX[j];
263 real ay = gy*grid->atv[j] + g0*atvY[j];
264 real az = gz*grid->atv[j] + g0*atvZ[j];
265 data->res[j+ioff] +=
266 vt[0]*a0 + vt[1]*ax +
267 vt[2]*ay + vt[3]*az;
268 }
269 }
270 }
271 }
272 } else { /* dogga == FALSE */
273 real vt;
274 if(fabs(b0)<=grid->dfthri) return;
275 dftpot1_(&vxc, &grid->curr_weight, &grid->dp, &data->trplet);
276 vt = vxc.fRR*b0;
277 for(isym=0; isym<inforb_.nsym; isym++) {
278 integer jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
279 if(isym>=jsym) {
280 integer istr = inforb_.ibas[isym];
281 integer iend = inforb_.ibas[isym] + inforb_.nbas[isym];
282 integer jstr = inforb_.ibas[jsym];
283 integer jend = inforb_.ibas[jsym] + inforb_.nbas[jsym]-1;
284 for(i=istr; i<iend; i++) {
285 integer ioff = i*inforb_.nbast;
286 real gvi = vt*grid->atv[i];
287 for(j=min(i,jend); j>=jstr; j--)
288 data->res[j+ioff] += gvi*grid->atv[j];
289 }
290 }
291 }
292 }
293 }
294
295 /* dft_lin_resp_:
296 Main Linear Response evaluator.
297 FMAT(NORBT,NORBT) - result added to FMAT.
298 cmo -const
299 zymat(NORBT,NORBT) - const response vector.
300 trplet - triplet excitation? (bool)
301 */
302 void
FSYM2(dft_lin_resp)303 FSYM2(dft_lin_resp)(real* fmat, real *cmo, real *zymat, integer *trplet,
304 integer *ksymop, real* work, integer* lwork, integer* iprint)
305 {
306 static const real DP5R = 0.5;
307 struct tms starttm, endtm; clock_t utm;
308 real electrons;
309 LinRespData lr_data; /* linear response data */
310 DftCallbackData cbdata[1];
311 DftDensity dens = { dft_dens_restricted, NULL, NULL };
312 real dummy;
313 integer nvec1=1;
314 integer i, j;
315
316 /* WARNING: NO work MAY BE done before syncing slaves! */
317 dft_wake_slaves((DFTPropEvalMaster)dft_lin_resp_); /* NO-OP in serial */
318 dft_lin_resp_sync_slaves(cmo,&nvec1,&zymat,trplet,ksymop,
319 &work, *lwork); /* NO-OP in serial */
320
321 times(&starttm);
322 lr_data.dmat = malloc(inforb_.n2basx*sizeof(real));
323 lr_data.res = calloc(inforb_.n2basx,sizeof(real));
324 lr_data.kappa = calloc(inforb_.n2basx,sizeof(real));
325 lr_data.dtgao = malloc(inforb_.nbast *sizeof(real));
326 lr_data.trplet= *trplet;
327 lr_data.ksymop = *ksymop;
328 dens.dmata = lr_data.dmat;
329
330 FSYM2(dft_get_ao_dens_mat)(cmo, lr_data.dmat, work, lwork);
331 FSYM(deq27)(cmo,zymat,&dummy,lr_data.kappa,&dummy,work,lwork);
332 dscal_(&inforb_.n2basx,&DP5R,lr_data.kappa,&ONEI);
333 if(DFTLR_DEBUG) {
334 fort_print("kappa matrix in dft_lin_resp");
335 outmat_(lr_data.kappa,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
336 &inforb_.nbast, &inforb_.nbast);
337 }
338
339 cbdata[0].callback = (DftCallback)lin_resp_cb;
340 cbdata[0].cb_data = &lr_data;
341 electrons = dft_integrate_ao(&dens, work, lwork, iprint, 0, 0, 0,
342 cbdata, ELEMENTS(cbdata));
343
344 if(DFTLR_DEBUG) {
345 fort_print("AO Fock matrix contribution in dft_lin_resp");
346 outmat_(lr_data.res,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
347 &inforb_.nbast, &inforb_.nbast);
348 }
349
350 for(i=0; i<inforb_.nbast; i++) {
351 integer ioff = i*inforb_.nbast;
352 for(j=0; j<i; j++) {
353 integer joff = j*inforb_.nbast;
354 real averag = lr_data.res[i+joff] + lr_data.res[j+ioff];
355 lr_data.res[i+joff] = lr_data.res[j+ioff] = averag;
356 }
357 }
358
359 if(DFTLR_DEBUG) {
360 fort_print("MO Fock matrix contribution in dft_lin_resp");
361 outmat_(work,&ONEI,&inforb_.norbt,&ONEI,&inforb_.norbt,
362 &inforb_.norbt, &inforb_.norbt);
363 }
364
365 /* transform to MO Fock matrix contribution */
366 FSYM(lrao2mo)(cmo, ksymop, lr_data.res, fmat, work, lwork);
367
368 /* FIXME: parallelization here! */
369 free(lr_data.dmat);
370 free(lr_data.kappa);
371 free(lr_data.res);
372 free(lr_data.dtgao);
373 if (*iprint>0) {
374 times(&endtm);
375 utm = endtm.tms_utime-starttm.tms_utime;
376 fort_print(" Electrons: %f(%9.1g): LR-DFT eval. time: %9.1f s",
377 electrons, (electrons-2.0*inforb_.nrhft)/(2.0*inforb_.nrhft),
378 utm/(double)sysconf(_SC_CLK_TCK));
379 }
380 }
381
382 /* ------------------------------------------------------------------- */
383 /* ---------- DFT LONDON ORBITAL TRANSFORMATION EVALUATOR ------------ */
384 /* ------------------------------------------------------------------- */
385 void
386 FSYM(dftmag)(real* excmat,const real* x,const real* y,const real* z,
387 const real* gao, const real* gao1,const real* gab1,
388 const real* gab2,const real* vxc, const real* vxb,
389 const real* rh, const integer* dogga,const integer* fromvx);
390
391 static void
london_cb(DftGrid * grid,real * d)392 london_cb(DftGrid* grid, real* d)
393 {
394 FirstDrv drvs;
395 integer lnd_off = grid->london_off*inforb_.nbast;
396
397 dftpot0_(&drvs, &grid->curr_weight, &grid->dp);
398 if(grid->dogga) drvs.fZ *= 1.0/grid->dp.grada;
399 /*
400 fort_print("DFTMAG: (%9.4f,%9.4f,%9.4f): RHO=%g %g %g",
401 grid->corx[grid->curr_point],
402 grid->cory[grid->curr_point],
403 grid->corz[grid->curr_point],
404 grid->rho,
405 drvs.df1000, drvs.df0010);
406 */
407 dftmag_(d,&grid->coor[grid->curr_point][0],
408 &grid->coor[grid->curr_point][1],
409 &grid->coor[grid->curr_point][2],
410 grid->atv, &grid->atv[inforb_.nbast],
411 &grid->atv[lnd_off],&grid->atv[lnd_off+inforb_.nbast],
412 &drvs.fR, &drvs.fZ, grid->grada, &grid->dogga,
413 &ZEROI);
414 }
415
416 void
417 FSYM(stopit)(const char* sub,const char* place,
418 const integer* int1, const integer* int2,
419 integer sub_len, integer place_len);
420
421 void
FSYM2(dft_london)422 FSYM2(dft_london)(real* fx, real* fy, real* fz, real* work,
423 integer* lwork, integer* iprint)
424 {
425 DftCallbackData cbdata[1];
426 DftDensity dens = { dft_dens_restricted, NULL, NULL };
427 struct tms starttm, endtm; clock_t utm;
428 real electrons;
429 integer i, j;
430 real* new_work, *xcmat, *dmat;
431 integer new_lwork, xc_sz=3*inforb_.n2basx;
432
433 xcmat = work;
434 dmat = work + 3*inforb_.n2basx;
435 new_work = work + 4*inforb_.n2basx;
436 new_lwork = *lwork-4*inforb_.n2basx;
437 if (new_lwork<0) stopit_("DFTLND"," ", lwork, lwork, 6, 1);
438 cbdata[0].callback = (DftCallback)london_cb;
439 cbdata[0].cb_data = xcmat;
440 dens.dmata = dmat;
441
442 times(&starttm);
443 FSYM(dftdns)(dmat, work, &new_lwork, iprint);
444 dzero_(xcmat, &xc_sz);
445 electrons = dft_integrate_ao(&dens, new_work, &new_lwork, iprint, 0, 0, 1,
446 cbdata, ELEMENTS(cbdata));
447 /* dft_mol_grad_collect_info(work); NO-OP in serial */
448 if (*iprint>0) {
449 times(&endtm);
450 utm = endtm.tms_utime-starttm.tms_utime;
451 fort_print(" Electrons: %f (%9.1g): LONDON evaluation time: %10.2f s",
452 electrons, (double)(electrons-(integer)(electrons+0.5)),
453 (double)(utm/(double)sysconf(_SC_CLK_TCK)));
454 }
455
456 /* post-transformation: dftdrv stage */
457 /*output_(xcmat,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
458 &inforb_.nbast, &inforb_.nbast, &ONEI, &priunt_.lupri); */
459
460 for(i=0; i<inforb_.nbast; i++)
461 for(j=0; j<i; j++) {
462 real averag = 0.5*(xcmat[i+j*inforb_.nbast]-
463 xcmat[j+i*inforb_.nbast]);
464 xcmat[i+j*inforb_.nbast] = averag;
465 xcmat[j+i*inforb_.nbast] = -averag;
466 }
467
468 if(DFTMAG_DEBUG) {
469 fort_print("DFT LDN contribution (x direction)");
470 outmat_(xcmat,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
471 &inforb_.nbast, &inforb_.nbast);
472 fort_print("DFT LDN contribution (y direction)");
473 outmat_(xcmat+inforb_.n2basx,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
474 &inforb_.nbast, &inforb_.nbast);
475 fort_print("DFT LDN contribution (z direction)");
476 outmat_(xcmat+2*inforb_.n2basx,&ONEI,&inforb_.nbast,&ONEI,
477 &inforb_.nbast, &inforb_.nbast, &inforb_.nbast);
478 }
479
480 /* post-transformation: dftlnd stage */
481 daxpy_(&inforb_.n2basx, &ONER,xcmat, &ONEI,fx,&ONEI);
482 daxpy_(&inforb_.n2basx, &ONER,xcmat+inforb_.n2basx, &ONEI,fy,&ONEI);
483 daxpy_(&inforb_.n2basx, &ONER,xcmat+inforb_.n2basx*2,&ONEI,fz,&ONEI);
484 if(DFTMAG_DEBUG) {
485 fort_print("FX (x direction)");
486 outmat_(fx,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
487 &inforb_.nbast, &inforb_.nbast);
488 fort_print("FY (y direction)");
489 outmat_(fy,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
490 &inforb_.nbast, &inforb_.nbast);
491 fort_print("FZ (z direction)");
492 outmat_(fz,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
493 &inforb_.nbast, &inforb_.nbast);
494 }
495
496 }
497
498
499 /* ================================================================== */
500 /* Open shell versions of the dft property evaluators */
501 /* ================================================================== */
502 /* ZR alpha/beta version */
503 typedef struct {
504 real *ksma, *ksmb;
505 real energy;
506 } DftKohnShamU;
507
508 #if defined(VAR_MPI)
509 /* dft_kohn_sham_ab_slave:
510 this is a slave driver. It's task is to allocate memory needed by
511 the main property evaluator (dft_kohn_shamab in this case) and call it.
512 */
513 void
dft_kohn_shamab_slave(real * work,integer * lwork,integer * iprint)514 dft_kohn_shamab_slave(real* work, integer* lwork, integer* iprint)
515 {
516 real* dmat = malloc(2*inforb_.n2basx*sizeof(real));
517 real* ksm = calloc(2*inforb_.n2basx,sizeof(real));
518 real edfty;
519 FSYM2(dft_kohn_shamab)(dmat, ksm, &edfty, work, lwork, iprint);
520 free(dmat);
521 free(ksm);
522 }
523
524 static __inline__ void
dft_kohn_shamab_sync_slaves(real * dmat)525 dft_kohn_shamab_sync_slaves(real* dmat)
526 {
527 MPI_Bcast(dmat, 2*inforb_.n2basx,MPI_DOUBLE,
528 MASTER_NO, MPI_COMM_WORLD);
529 }
530
531 static __inline__ void
dft_kohn_shamab_collect_info(real * ksm,real * energy,real * work,integer lwork)532 dft_kohn_shamab_collect_info(real*ksm, real* energy, real* work, integer lwork)
533 {
534 real tmp = *energy;
535 int sz = 0;
536 MPI_Comm_size(MPI_COMM_WORLD, &sz);
537 if(sz<=1) return;
538
539 sz = 2*inforb_.n2basx;
540 integer sz_ftn = sz;
541 CHECK_WRKMEM(sz,lwork);
542 dcopy_(&sz_ftn, ksm, &ONEI, work, &ONEI);
543 MPI_Reduce(work, ksm, sz, MPI_DOUBLE, MPI_SUM,
544 MASTER_NO, MPI_COMM_WORLD);
545 MPI_Reduce(&tmp, energy, 1, MPI_DOUBLE, MPI_SUM,
546 MASTER_NO, MPI_COMM_WORLD);
547 }
548
549 #else /* VAR_MPI */
550 #define dft_kohn_shamab_sync_slaves(dmat)
551 #define dft_kohn_shamab_collect_info(myksm, ksm, energy,lwork)
552 #endif /* VAR_MPI */
553
554 static void
kohn_shamab_cb(DftGrid * grid,DftKohnShamU * exc)555 kohn_shamab_cb(DftGrid* grid, DftKohnShamU* exc)
556 {
557 FunFirstFuncDrv drvs;
558 integer isym, i, j;
559
560 drv1_clear(&drvs);
561 exc->energy += selected_func->func(&grid->dp)*grid->curr_weight;
562 if(grid->dp.rhob<1e-20) grid->dp.rhob = 1e-20;
563 if(grid->dp.gradb<1e-20) grid->dp.gradb = 1e-20;
564 if(grid->dp.gradab<1e-20) /* so small that we can ignore direction */
565 grid->dp.gradab = 1e-20;
566 selected_func->first(&drvs, grid->curr_weight, &grid->dp);
567 if(isnan(drvs.df1000) || isnan(drvs.df0100) || isinf(drvs.df0100) ||
568 isnan(drvs.df0010) || isnan(drvs.df0001) ||
569 isnan(drvs.df00001)) {
570 fort_print("AAA: %g %g %g %g %g => %g %g %g %g %g",
571 grid->dp.rhoa, grid->dp.grada,
572 grid->dp.rhob, grid->dp.gradb, grid->dp.gradab,
573 drvs.df1000, drvs.df0100, drvs.df0010, drvs.df0001,
574 drvs.df00001);
575 }
576 if(grid->dogga) {
577 real *atvX = &grid->atv[inforb_.nbast];
578 real *atvY = &grid->atv[inforb_.nbast*2];
579 real *atvZ = &grid->atv[inforb_.nbast*3];
580 real fxa, fya, fza, fxb, fyb, fzb;
581 /* add epsilon to avoid division by zero */
582 real grada = fabs(grid->dp.grada)>1e-40 ? grid->dp.grada : 1e-40;
583 real gradb = fabs(grid->dp.gradb)>1e-40 ? grid->dp.gradb : 1e-40;
584 /* alpha */
585 drvs.df0010 *= 2.0/grada;
586 fxa = drvs.df0010*grid->grada[0]+2.0*drvs.df00001*grid->gradb[0];
587 fya = drvs.df0010*grid->grada[1]+2.0*drvs.df00001*grid->gradb[1];
588 fza = drvs.df0010*grid->grada[2]+2.0*drvs.df00001*grid->gradb[2];
589 /*beta */
590 drvs.df0001 *= 2.0/gradb;
591 fxb = drvs.df0001*grid->gradb[0]+2.0*drvs.df00001*grid->grada[0];
592 fyb = drvs.df0001*grid->gradb[1]+2.0*drvs.df00001*grid->grada[1];
593 fzb = drvs.df0001*grid->gradb[2]+2.0*drvs.df00001*grid->grada[2];
594
595 for(isym=0; isym<inforb_.nsym; isym++) {
596 integer istr = inforb_.ibas[isym];
597 integer iend = inforb_.ibas[isym]+inforb_.nbas[isym];
598 for(j=istr; j<iend; j++) {
599 real fca = drvs.df1000*grid->atv[j]
600 +fxa*atvX[j] + fya*atvY[j] + fza*atvZ[j];
601 real fcb = drvs.df0100*grid->atv[j]
602 +fxb*atvX[j] + fyb*atvY[j] + fzb*atvZ[j];
603 integer joff = j*inforb_.nbast;
604 for(i=istr; i<iend; i++)
605 exc->ksma[i+joff] += fca*grid->atv[i];
606 for(i=istr; i<iend; i++)
607 exc->ksmb[i+joff] += fcb*grid->atv[i];
608 }
609 }
610 } else {
611 for(isym=0; isym<inforb_.nsym; isym++) {
612 integer istr = inforb_.ibas[isym];
613 integer iend = inforb_.ibas[isym]+inforb_.nbas[isym];
614 for(j=istr; j<iend; j++) {
615 real gvxca = 2.0*drvs.df1000*grid->atv[j];
616 real gvxcb = 2.0*drvs.df0100*grid->atv[j];
617 integer joff = j*inforb_.nbast;
618 for(i=istr; i<j; i++)
619 exc->ksma[i+joff] += gvxca*grid->atv[i];
620 exc->ksma[j+joff] += 0.5*gvxca*grid->atv[j];
621 for(i=istr; i<j; i++)
622 exc->ksmb[i+joff] += gvxcb*grid->atv[i];
623 exc->ksmb[j+joff] += 0.5*gvxcb*grid->atv[j];
624 }
625 }
626 }
627 }
628
629
630 /* ZR alpha/beta version:
631 * we require that dmata and dmatb are aligned consecutively in memory
632 * in order to reduce the MPI latency penalty.
633 */
634 void
FSYM2(dft_kohn_shamab)635 FSYM2(dft_kohn_shamab)(real* dmat, real* ksm, real *edfty,
636 real* work, integer *lwork, integer* iprint)
637 {
638 integer nbast2, i, j;
639 DftCallbackData cbdata[1];
640 DftKohnShamU res; /* res like result */
641 DftDensity dens = { dft_dens_unrestricted, NULL, NULL };
642 struct tms starttm, endtm; clock_t utm;
643 real electrons, exp_el = 2.0*inforb_.nrhft+inforb_.nasht;
644 integer sz;
645
646 /* WARNING: NO work MAY BE done before syncing slaves! */
647 dft_wake_slaves((DFTPropEvalMaster)FSYM2(dft_kohn_shamab));/* NO-OP in serial */
648 dft_kohn_shamab_sync_slaves(dmat); /* NO-OP in serial */
649
650 dens.dmata = dmat; dens.dmatb = dmat + inforb_.n2basx;
651 nbast2 = inforb_.nbast*inforb_.nbast;
652 res.ksma = calloc(2*nbast2, sizeof(real));
653 res.ksmb = res.ksma + nbast2;
654 cbdata[0].callback = (DftCallback)kohn_shamab_cb;
655 cbdata[0].cb_data = &res;
656
657 times(&starttm);
658 res.energy = 0.0;
659 electrons = dft_integrate_ao(&dens, work, lwork, iprint, 0, 0, 0,
660 cbdata, ELEMENTS(cbdata));
661 dft_kohn_shamab_collect_info(res.ksma, &res.energy, work, *lwork);
662
663 for(i=0; i<inforb_.nbast; i++) {
664 integer ioff = i*inforb_.nbast;
665 for(j=0; j<i; j++) {
666 integer joff = j*inforb_.nbast;
667 real averag = 0.5*(res.ksma[i+joff] + res.ksma[j+ioff]);
668 res.ksma[i+joff] = res.ksma[j+ioff] = averag;
669 averag = 0.5*(res.ksmb[i+joff] + res.ksmb[j+ioff]);
670 res.ksmb[i+joff] = res.ksmb[j+ioff] = averag;
671 }
672 }
673 *edfty=res.energy;
674 sz = 2*inforb_.n2basx;
675 daxpy_(&sz, &ONER, res.ksma, &ONEI, ksm, &ONEI);
676
677 free(res.ksma);
678 if (*iprint>0) {
679 times(&endtm);
680 utm = endtm.tms_utime-starttm.tms_utime;
681 fort_print(" Electrons: %11.7f %9.1g: Energy %f KS time: %9.1f s",
682 electrons, (electrons-exp_el)/exp_el,
683 res.energy, utm/(double)sysconf(_SC_CLK_TCK));
684 }
685 }
686
687 /* =================================================================== */
688 typedef struct {
689 real* dmata, *dmatb, *kappaa, *kappab, *resa, *resb;
690 real* dtgaoa, *dtgaob;
691 integer trplet, ksymop;
692 } LinRespDataab;
693
694
695 #if defined(VAR_MPI)
696 void
dft_lin_respab_slave(real * work,integer * lwork,integer * iprint)697 dft_lin_respab_slave(real* work, integer* lwork, integer* iprint)
698 {
699 real *fmat = calloc(2*inforb_.n2orbx,sizeof(real)); /* OUT */
700 real *cmo = malloc(inforb_.norbt*inforb_.nbast*sizeof(real)); /* IN */
701 real *zymat= malloc(2*inforb_.n2orbx*sizeof(real)); /* IN */
702 integer trplet; /* IN: will be synced from master */
703 integer ksymop; /* IN: will be synced from master */
704 FSYM2(dft_lin_respab)(fmat, fmat+inforb_.n2orbx, cmo, zymat, &trplet,
705 &ksymop, work, lwork, iprint);
706 free(fmat);
707 free(cmo);
708 free(zymat);
709 }
710
711 static __inline__ void
dft_lin_respab_collect_info(real * fmat,real * work,integer lwork)712 dft_lin_respab_collect_info(real* fmat, real*work, integer lwork)
713 {
714 int sz = 0;
715 MPI_Comm_size(MPI_COMM_WORLD, &sz);
716 if(sz<=1) return;
717
718 sz = 2*inforb_.n2orbx;
719 integer sz_ftn =sz;
720 CHECK_WRKMEM(sz,lwork);
721 dcopy_(&sz_ftn, fmat, &ONEI, work, &ONEI);
722 MPI_Reduce(work, fmat, sz, MPI_DOUBLE, MPI_SUM,
723 MASTER_NO, MPI_COMM_WORLD);
724 }
725
726 #else /* VAR_MPI */
727 #define dft_lin_respab_collect_info(fmat,work, lwork)
728 #endif /* VAR_MPI */
729
730
731 static void
compute_trans_rho(DftGrid * grid,LinRespDataab * data,real * rhowa,real * rhowb)732 compute_trans_rho(DftGrid* grid, LinRespDataab* data, real* rhowa, real* rhowb)
733 {
734 dgemv_("N",&inforb_.nbast,&inforb_.nbast,&ONER,
735 data->kappaa,&inforb_.nbast,grid->atv,&ONEI,&ZEROR,
736 data->dtgaoa,&ONEI);
737 *rhowa = ddot_(&inforb_.nbast,data->dtgaoa,&ONEI,grid->atv,&ONEI);
738 dgemv_("N",&inforb_.nbast,&inforb_.nbast,&ONER,
739 data->kappab,&inforb_.nbast,grid->atv,&ONEI,&ZEROR,
740 data->dtgaob,&ONEI);
741 *rhowb = ddot_(&inforb_.nbast,data->dtgaob,&ONEI,grid->atv,&ONEI);
742 }
743
744
745 /* lin_resp_cbab:
746 * ZR alpa/beta version of the linear response evaluation routine.
747 * triplet: hypotetical case of triplet excitations
748 * should work for general case.... */
749 static void
lin_resp_cbab_gga(DftGrid * grid,LinRespDataab * data)750 lin_resp_cbab_gga(DftGrid* grid, LinRespDataab* data)
751 {
752 /* real b0a, b0b, b3a[3],b3b[3],gradab; */
753 real rhowa,rhowb, gradwa[3], gradwb[3];
754 integer isym, i, j;
755 integer jsym, istr, iend, jstr, jend, ioff;
756 FunSecondFuncDrv vxc;
757 real zetaa, zetab, zetac;
758 real znva, rxa, rya, rza;
759 real znvb, rxb, ryb, rzb;
760 real fac0, facr, facz;
761 real ar, ab, arb, abw;
762 real g0, gx, gy, gz;
763 real a0, ax, ay, az;
764 real *atvX = &grid->atv[inforb_.nbast];
765 real *atvY = &grid->atv[inforb_.nbast*2];
766 real *atvZ = &grid->atv[inforb_.nbast*3];
767
768 compute_trans_rho(grid, data, &rhowa, &rhowb);
769 if (data->trplet) rhowb = -rhowb;
770 /* gradwa evaluation */
771 dgemv_("T",&inforb_.nbast,&THREEI,&ONER,atvX, &inforb_.nbast,
772 data->dtgaoa,&ONEI, &ZEROR, gradwa, &ONEI);
773 dgemv_("T",&inforb_.nbast, &inforb_.nbast, &ONER, data->kappaa,
774 &inforb_.nbast,grid->atv,&ONEI,&ZEROR,data->dtgaoa,&ONEI);
775 dgemv_("T",&inforb_.nbast,&THREEI,&ONER,atvX,
776 &inforb_.nbast,data->dtgaoa,&ONEI,&ONER,gradwa,&ONEI);
777 /* gradwb evaluation */
778 dgemv_("T",&inforb_.nbast,&THREEI,&ONER,atvX, &inforb_.nbast,
779 data->dtgaob,&ONEI, &ZEROR, gradwb, &ONEI);
780 dgemv_("T",&inforb_.nbast, &inforb_.nbast, &ONER, data->kappab,
781 &inforb_.nbast,grid->atv,&ONEI,&ZEROR,data->dtgaob,&ONEI);
782 dgemv_("T",&inforb_.nbast,&THREEI,&ONER,atvX,
783 &inforb_.nbast,data->dtgaob,&ONEI,&ONER,gradwb,&ONEI);
784 if (data->trplet) {
785 gradwb[0]= -gradwb[0];
786 gradwb[1]= -gradwb[1];
787 gradwb[2]= -gradwb[2];
788 }
789 /* second derivatives calculation */
790 drv2_clear(&vxc);
791 selected_func->second(&vxc, grid->curr_weight, &grid->dp);
792 /* alpha coeficients */
793 /* add epsilon to avoid division by zero */
794 znva = 1.0/(fabs(grid->dp.grada)>1e-40 ? grid->dp.grada : 1e-40);
795 vxc.df0010 = znva*vxc.df0010;
796 rxa = znva*grid->grada[0];
797 rya = znva*grid->grada[1];
798 rza = znva*grid->grada[2];
799 /* beta coeficients */
800 znvb = 1.0/(fabs(grid->dp.gradb)>1e-40 ? grid->dp.gradb : 1e-40);
801 vxc.df0001 = znvb*vxc.df0001;
802 rxb = znvb*grid->gradb[0];
803 ryb = znvb*grid->gradb[1];
804 rzb = znvb*grid->gradb[2];
805 /* variations of functionals variables */
806 zetaa = gradwa[0]*rxa + gradwa[1]*rya + gradwa[2]*rza;
807 zetab = gradwb[0]*rxb + gradwb[1]*ryb + gradwb[2]*rzb;
808 zetac = gradwa[0]*grid->gradb[0]+gradwa[1]*grid->gradb[1] +
809 gradwa[2]*grid->gradb[2]+gradwb[0]*grid->grada[0] +
810 gradwb[1]*grid->grada[1]+gradwb[2]*grid->grada[2];
811
812 /* Derivatives can go in principle to infinity if evaluated at
813 * inapriopriate points. Take precautions. */
814 fac0 = vxc.df1010*zetaa+vxc.df1001*zetab +vxc.df10001*zetac;
815 if(fabs(rhowa)>0) fac0 += vxc.df2000*rhowa;
816 if(fabs(rhowa)>0) fac0 += vxc.df1100*rhowb;
817 facr = vxc.df1010*rhowa+vxc.df0110*rhowb+vxc.df0020*zetaa
818 +vxc.df0011*zetab
819 +vxc.df00101*zetac;
820 /* note: the mixed zetac derivatives not included in facr */
821 facz = vxc.df10001*rhowa+vxc.df01001*rhowb
822 + vxc.df00101*zetaa+vxc.df00011*zetab+vxc.df00002*zetac;
823 /* note: the mixed zetac derivatives not included in facz */
824 for(isym=0; isym<inforb_.nsym; isym++) {
825 istr = inforb_.ibas[isym];
826 iend = inforb_.ibas[isym] + inforb_.nbas[isym];
827 jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
828 if (isym>=jsym) {
829 jstr = inforb_.ibas[jsym];
830 jend = inforb_.ibas[jsym] + inforb_.nbas[jsym];
831 for(i=istr; i<iend; i++) {
832 g0 = grid->atv[i];
833 gx = atvX[i];
834 gy = atvY[i];
835 gz = atvZ[i];
836 ioff = i*inforb_.nbast;
837 for(j=min(i,jend); j>=jstr; j--) {
838 a0 = g0*grid->atv[j];
839 ax = gx*grid->atv[j] + g0*atvX[j];
840 ay = gy*grid->atv[j] + g0*atvY[j];
841 az = gz*grid->atv[j] + g0*atvZ[j];
842 ar = ax*rxa + ay*rya + az*rza;
843 ab = ax*gradwa[0]+ay*gradwa[1]+az*gradwa[2]-ar*zetaa;
844 arb = ax*grid->gradb[0]+ay*grid->gradb[1]+az*grid->gradb[2];
845 abw = ax*gradwb[0]+ay*gradwb[1]+az*gradwb[2];
846 /* triplet:
847 abw = -(ax*gradwb[0]+ay*gradwb[1]+az*gradwb[2]);
848 */
849 data->resa[j+ioff] += 0.5*(fac0*a0+facr*ar+arb*facz+
850 vxc.df0010*ab+abw*vxc.df00001);
851 }
852 }
853 }
854 }
855 /* beta Fock contribution evaluation */
856 fac0 = vxc.df0101*zetab+vxc.df0110*zetaa +vxc.df01001*zetac;
857 if(fabs(rhowb)>0) fac0 += vxc.df0200*rhowb;
858 if(fabs(rhowa)>0) fac0 += vxc.df1100*rhowa;
859 facr = vxc.df0101*rhowb+vxc.df1001*rhowa
860 +vxc.df0002*zetab+vxc.df0011*zetaa+vxc.df00011*zetac;
861 /* note: the mixed zetac derivatives not included in facr */
862 facz = vxc.df10001*rhowa+vxc.df01001*rhowb
863 + vxc.df00011*zetab+vxc.df00101*zetaa+vxc.df00002*zetac;
864 /* note: the mixed zetac derivatives not included in facr */
865 for(isym=0; isym<inforb_.nsym; isym++) {
866 istr = inforb_.ibas[isym];
867 iend = inforb_.ibas[isym] + inforb_.nbas[isym];
868 jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
869 if (isym>=jsym) {
870 jstr = inforb_.ibas[jsym];
871 jend = inforb_.ibas[jsym] + inforb_.nbas[jsym];
872 for(i=istr; i<iend; i++) {
873 g0 = grid->atv[i];
874 gx = atvX[i];
875 gy = atvY[i];
876 gz = atvZ[i];
877 ioff = i*inforb_.nbast;
878 for(j=min(i,jend); j>=jstr; j--) {
879 a0 = g0*grid->atv[j];
880 ax = gx*grid->atv[j] + g0*atvX[j];
881 ay = gy*grid->atv[j] + g0*atvY[j];
882 az = gz*grid->atv[j] + g0*atvZ[j];
883 ar = ax*rxb + ay*ryb + az*rzb;
884 ab = ax*gradwb[0]+ay*gradwb[1]+az*gradwb[2]
885 -ar*zetab;
886 arb = ax*grid->grada[0]+ay*grid->grada[1]
887 +az*grid->grada[2];
888 abw = ax*gradwa[0]+ay*gradwa[1]+az*gradwa[2];
889 data->resb[j+ioff] +=
890 0.5*(fac0*a0+facr*ar+arb*facz+
891 vxc.df0001*ab+abw*vxc.df00001);
892 }
893 }
894 }
895 }
896 }
897
898 /* lin_resp_cbab:
899 * ZR alpa/beta version of the linear response evaluation routine.
900 * triplet: hypotetical case of triplet excitations
901 * should work for general case.... */
902 static void
lin_resp_cbab_nogga(DftGrid * grid,LinRespDataab * data)903 lin_resp_cbab_nogga(DftGrid* grid, LinRespDataab* data)
904 {
905 /* real b0a, b0b, b3a[3],b3b[3],gradab; */
906 real rhowa,rhowb;
907 integer isym, i, j;
908 integer jsym, istr, iend, jstr, jend, ioff;
909 FunSecondFuncDrv vxc;
910 real vt, gvi;
911
912 compute_trans_rho(grid, data, &rhowa, &rhowb);
913 if (data->trplet) rhowb = -rhowb;
914 drv2_clear(&vxc);
915 selected_func->second(&vxc, grid->curr_weight, &grid->dp);
916 /* alpha Fock contribution */
917 /* derivatives can in principle diverge, be cautious! */
918 vt = 0;
919 if(fabs(rhowa)>0) vt += 0.5*vxc.df2000*rhowa;
920 if(fabs(rhowb)>0) vt += 0.5*vxc.df1100*rhowb;
921 if(fabs(vt)>1e-15) {
922 for(isym=0; isym<inforb_.nsym; isym++) {
923 jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
924 if(isym>=jsym) {
925 istr = inforb_.ibas[isym];
926 iend = inforb_.ibas[isym] + inforb_.nbas[isym];
927 jstr = inforb_.ibas[jsym];
928 jend = inforb_.ibas[jsym] + inforb_.nbas[jsym];
929 for(i=istr; i<iend; i++) {
930 ioff = i*inforb_.nbast;
931 gvi = vt*grid->atv[i];
932 for(j=min(i,jend-1); j>=jstr; j--)
933 data->resa[j+ioff] += gvi*grid->atv[j];
934 }
935 }
936 }
937 }
938 /* beta Fock contribution */
939 /* derivatives can in principle diverge, be cautious! */
940 vt = 0;
941 if(fabs(rhowb)>0) vt += 0.5*vxc.df0200*rhowb;
942 if(fabs(rhowa)>0) vt += 0.5*vxc.df1100*rhowa;
943 if(fabs(vt)>1e-15) {
944 for(isym=0; isym<inforb_.nsym; isym++) {
945 jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
946 if(isym>=jsym) {
947 istr = inforb_.ibas[isym];
948 iend = inforb_.ibas[isym] + inforb_.nbas[isym];
949 jstr = inforb_.ibas[jsym];
950 jend = inforb_.ibas[jsym] + inforb_.nbas[jsym];
951 for(i=istr; i<iend; i++) {
952 ioff = i*inforb_.nbast;
953 gvi = vt*grid->atv[i];
954 for(j=min(i,jend-1); j>=jstr; j--)
955 data->resb[j+ioff] += gvi*grid->atv[j];
956 }
957 }
958 }
959 }
960 }
961
962 /* ZR linear response for open shell system:
963 Evaluates DFT contributions:
964 - core fock matrix i.e. FCONE
965 - open shell fock matix i.e FVONE
966 */
967
968 void
FSYM2(dft_lin_respab)969 FSYM2(dft_lin_respab)(real* fmatc, real* fmato, real *cmo, real *zymat,
970 integer *trplet, integer *ksymop,
971 real* work, integer* lwork, integer* iprint)
972 {
973 static const real DP5R = 0.5;
974 static const real MONER = -1.0;
975 real electrons = 13.0;
976 struct tms starttm, endtm; clock_t utm;
977 LinRespDataab lr_data;
978 DftCallbackData cbdata[1];
979 integer i=1, j, ioff, joff, norbi, norbj;
980 integer nvec1=1;
981 real averag;
982 real *fmata, *fmatb;
983 real *runit;
984 DftDensity dens = { dft_dens_unrestricted, NULL, NULL };
985 integer isym, jsym;
986
987 /* WARNING: NO work MAY BE done before syncing slaves! */
988 dft_wake_slaves((DFTPropEvalMaster)FSYM2(dft_lin_respab)); /* NO-OP in serial */
989 dft_lin_resp_sync_slaves(cmo,&nvec1,&zymat,trplet, ksymop,
990 &work, *lwork); /* NO-OP in serial */
991
992 times(&starttm);
993 /* linear reponse data */
994 dens.dmata = lr_data.dmata = malloc(inforb_.n2basx*sizeof(real));
995 dens.dmatb = lr_data.dmatb = malloc(inforb_.n2basx*sizeof(real));
996 lr_data.resa = calloc(2*inforb_.n2basx,sizeof(real));
997 lr_data.resb = lr_data.resa + inforb_.n2basx; /* it's an alias only */
998 lr_data.kappaa = calloc(inforb_.n2basx,sizeof(real));
999 lr_data.kappab = calloc(inforb_.n2basx,sizeof(real));
1000 lr_data.dtgaoa = malloc(inforb_.nbast *sizeof(real));
1001 lr_data.dtgaob = malloc(inforb_.nbast *sizeof(real));
1002 lr_data.trplet = *trplet;
1003 lr_data.ksymop = *ksymop;
1004
1005 /* get alpha/beta densities and corresponding kappas */
1006
1007 FSYM2(dft_get_ao_dens_matab)(cmo,lr_data.dmatb,lr_data.dmata,work,lwork);
1008 daxpy_(&inforb_.n2basx,&DP5R,lr_data.dmatb,&ONEI,lr_data.dmata,&ONEI);
1009 dscal_(&inforb_.n2basx,&DP5R,lr_data.dmatb,&ONEI);
1010 runit=calloc(inforb_.n2ashx,sizeof(real));
1011 dunit_(runit,&inforb_.nasht);
1012 FSYM(deq27)(cmo,zymat,runit,lr_data.kappab,lr_data.kappaa,work,lwork);
1013 dscal_(&inforb_.n2basx,&DP5R,lr_data.kappab,&ONEI);
1014 free(runit);
1015 daxpy_(&inforb_.n2basx,&ONER,lr_data.kappab,&ONEI,lr_data.kappaa,&ONEI);
1016
1017 cbdata[0].callback =
1018 (DftCallback)(selected_func->is_gga() ?
1019 lin_resp_cbab_gga : lin_resp_cbab_nogga);
1020 cbdata[0].cb_data = &lr_data;
1021 electrons = dft_integrate_ao(&dens,work, lwork, iprint, 0, 0, 0,
1022 cbdata,ELEMENTS(cbdata));
1023
1024 dft_lin_respab_collect_info(lr_data.resa, work, *lwork);/*serial:NO-OP*/
1025
1026 for(i=0; i<inforb_.nbast; i++) {
1027 ioff = i*inforb_.nbast;
1028 for(j=0; j<i; j++) {
1029 joff = j*inforb_.nbast;
1030 averag = lr_data.resa[i+joff] + lr_data.resa[j+ioff];
1031 lr_data.resa[i+joff] = lr_data.resa[j+ioff] = averag;
1032 averag = lr_data.resb[i+joff] + lr_data.resb[j+ioff];
1033 lr_data.resb[i+joff] = lr_data.resb[j+ioff] = averag;
1034 }
1035 }
1036 /* fort_print("Ressa: ");
1037 output_(lr_data.resa,&ONEI,&inforb_.norbt,&ONEI,&inforb_.norbt,
1038 &inforb_.norbt, &inforb_.norbt, &ONEI, &priunt_.lupri);
1039 fort_print("Ressb");
1040 output_(lr_data.resb,&ONEI,&inforb_.norbt,&ONEI,&inforb_.norbt,
1041 &inforb_.norbt, &inforb_.norbt, &ONEI, &priunt_.lupri); */
1042
1043 fmata=calloc(inforb_.n2orbx,sizeof(real));
1044 fmatb=calloc(inforb_.n2orbx,sizeof(real));
1045 for(isym=1; isym<=inforb_.nsym; isym++) {
1046 jsym = inforb_.muld2h[lr_data.ksymop-1][isym-1];
1047 norbi = inforb_.norb[isym-1];
1048 norbj = inforb_.norb[jsym-1];
1049 if(norbi>0 && norbj>0)
1050 FSYM(autpv)(&isym,&jsym,&cmo[inforb_.icmo[isym-1]],
1051 &cmo[inforb_.icmo[jsym-1]], lr_data.resa,
1052 inforb_.nbas,&inforb_.nbast,fmata,
1053 inforb_.norb,&inforb_.norbt,
1054 work, lwork);
1055 }
1056 for(isym=1; isym<=inforb_.nsym; isym++) {
1057 jsym = inforb_.muld2h[lr_data.ksymop-1][isym-1];
1058 norbi = inforb_.norb[isym-1];
1059 norbj = inforb_.norb[jsym-1];
1060 if(norbi>0 && norbj>0)
1061 FSYM(autpv)(&isym,&jsym,&cmo[inforb_.icmo[isym-1]],
1062 &cmo[inforb_.icmo[jsym-1]], lr_data.resb,
1063 inforb_.nbas,&inforb_.nbast,fmatb,
1064 inforb_.norb,&inforb_.norbt,
1065 work, lwork);
1066 }
1067 daxpy_(&inforb_.n2orbx,&TWOR,fmata,&ONEI,fmatc,&ONEI);
1068 if (inforb_.nasht > 0) {
1069 if (*trplet) {
1070 daxpy_(&inforb_.n2orbx,&MONER,fmatb,&ONEI,fmato,&ONEI);
1071 } else {
1072 daxpy_(&inforb_.n2orbx,&ONER,fmatb,&ONEI,fmato,&ONEI);
1073 }
1074 daxpy_(&inforb_.n2orbx,&MONER,fmata,&ONEI,fmato,&ONEI);
1075 }
1076 free(fmata);
1077 free(fmatb);
1078 free(lr_data.dmata);
1079 free(lr_data.dmatb);
1080 free(lr_data.resa);
1081 free(lr_data.kappaa);
1082 free(lr_data.kappab);
1083 free(lr_data.dtgaoa);
1084 free(lr_data.dtgaob);
1085 if (*iprint>0) {
1086 times(&endtm);
1087 utm = endtm.tms_utime-starttm.tms_utime;
1088 fort_print(" Electrons: %f(%9.1g): LR-DFT evaluation time: %9.1f s",
1089 electrons, (double)(electrons-(integer)(electrons+0.5)),
1090 utm/(double)sysconf(_SC_CLK_TCK));
1091 }
1092 }
1093
1094 /* =================================================================== */
1095 /* BLOCKED PROPERTY EVALUATORS */
1096 /* =================================================================== */
1097 static void
distribute_lda_bl(DftIntegratorBl * grid,integer bllen,integer blstart,integer blend,real * RESTRICT tmp,real * RESTRICT dR,real * RESTRICT excmat)1098 distribute_lda_bl(DftIntegratorBl *grid, integer bllen, integer blstart, integer blend,
1099 real * RESTRICT tmp, real *RESTRICT dR,
1100 real * RESTRICT excmat)
1101 {
1102 integer isym, jbl, j, ibl, i, k;
1103 real * RESTRICT aos = grid->atv;
1104
1105 for(isym=0; isym<grid->nsym; isym++) {
1106 integer (*RESTRICT blocks)[2] = BASBLOCK(grid,isym);
1107 integer bl_cnt = grid->bas_bl_cnt[isym];
1108
1109 for(jbl=0; jbl<bl_cnt; jbl++)
1110 for(j=blocks[jbl][0]-1; j<blocks[jbl][1]; j++) {
1111 integer joff = j*bllen;
1112 for(k=blstart; k<blend; k++)
1113 tmp[k+joff] = aos[k+joff]*dR[k];
1114 }
1115
1116 for(jbl=0; jbl<bl_cnt; jbl++) {
1117 for(j=blocks[jbl][0]-1; j<blocks[jbl][1]; j++) {
1118 real *RESTRICT tmpj = tmp + j*bllen; /* jth orbital */
1119 real *RESTRICT e_jcol = excmat + j*inforb_.nbast;
1120 for(ibl=0; ibl<bl_cnt; ibl++) {
1121 integer top = blocks[ibl][1] < j
1122 ? blocks[ibl][1] : j;
1123 for(i=blocks[ibl][0]-1; i<top; i++) {
1124 real *RESTRICT aosi = aos + i*bllen; /* ith orbital */
1125 for(k=blstart; k<blend; k++)
1126 e_jcol[i] += aosi[k]*tmpj[k];
1127 }
1128 }
1129 for(k=blstart; k<blend; k++)
1130 e_jcol[j] += 0.5*aos[k+j*bllen]*tmpj[k];
1131 }
1132 }
1133 }
1134 }
1135
1136 static void
distribute_gga_bl(DftIntegratorBl * grid,integer bllen,integer blstart,integer blend,real * RESTRICT tmp,real * RESTRICT dR,real * RESTRICT dZ,real * RESTRICT excmat)1137 distribute_gga_bl(DftIntegratorBl *grid, integer bllen, integer blstart, integer blend,
1138 real * RESTRICT tmp, real *RESTRICT dR, real *RESTRICT dZ,
1139 real * RESTRICT excmat)
1140 {
1141 integer isym, jbl, j, ibl, i, k;
1142 real * RESTRICT aox = grid->atv+bllen*inforb_.nbast;
1143 real * RESTRICT aoy = grid->atv+bllen*inforb_.nbast*2;
1144 real * RESTRICT aoz = grid->atv+bllen*inforb_.nbast*3;
1145 real * RESTRICT aos = grid->atv;
1146
1147 for(isym=0; isym<grid->nsym; isym++) {
1148 integer (*RESTRICT blocks)[2] = BASBLOCK(grid,isym);
1149 integer nblocks = grid->bas_bl_cnt[isym];
1150 for(jbl=0; jbl<nblocks; jbl++)
1151 for(j=blocks[jbl][0]-1; j<blocks[jbl][1]; j++) {
1152 integer joff = j*bllen;
1153 for(k=0; k<bllen; k++)
1154 tmp[k+joff] =
1155 dR[k]* aos[k+j*bllen] +
1156 dZ[k]*(aox[k+j*bllen]*grid->g.rad.a[k][0]+
1157 aoy[k+j*bllen]*grid->g.rad.a[k][1]+
1158 aoz[k+j*bllen]*grid->g.rad.a[k][2]);
1159 }
1160
1161 for(jbl=0; jbl<nblocks; jbl++) {
1162 for(j=blocks[jbl][0]-1; j<blocks[jbl][1]; j++) {
1163 real *RESTRICT tmpj = tmp + j*bllen; /* jth orbital */
1164 real *RESTRICT e_jcol = excmat + j*inforb_.nbast;
1165 for(ibl=0; ibl<nblocks; ibl++) {
1166 for(i=blocks[ibl][0]-1; i<blocks[ibl][1]; i++) {
1167 for(k=0; k<bllen; k++)
1168 e_jcol[i] += aos[k+i*bllen]*tmpj[k];
1169 }
1170 }
1171 }
1172 }
1173 }
1174 }
1175
1176 /* =================================================================== */
1177 /* blocked density and KS evaluation */
1178 /* =================================================================== */
1179
1180 struct ks_data {
1181 real* excmat;
1182 real* dR;
1183 real* dZ;
1184 real energy;
1185 };
1186 static void
kohn_sham_cb_b_lda(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,struct ks_data * data)1187 kohn_sham_cb_b_lda(DftIntegratorBl *grid, real * RESTRICT tmp,
1188 integer bllen, integer blstart, integer blend,
1189 struct ks_data* data)
1190 {
1191 FirstDrv drvs;
1192 integer i;
1193 real * RESTRICT excmat = data->excmat;
1194 real * RESTRICT dR = data->dR;
1195 FunDensProp dp = { 0 };
1196
1197 assert(grid->ntypso >0);
1198 for(i=blstart; i<bllen; i++) {
1199 real weight = grid->weight[grid->curr_point+i];
1200 dp.rhoa = dp. rhob = 0.5*grid->r.rho[i];
1201 data->energy += selected_func->func(&dp)*weight;
1202 dftpot0_(&drvs, &weight, &dp);
1203 dR[i] = 2*drvs.fR;
1204 }
1205
1206 distribute_lda_bl(grid, bllen, blstart, blend, tmp, dR, excmat);
1207 }
1208
1209 static void
kohn_sham_cb_b_gga(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,struct ks_data * data)1210 kohn_sham_cb_b_gga(DftIntegratorBl* grid, real * RESTRICT tmp,
1211 integer bllen, integer blstart, integer blend,
1212 struct ks_data* data)
1213 {
1214 FirstDrv drvs;
1215 integer i;
1216 real * RESTRICT excmat = data->excmat;
1217 real * RESTRICT dR = data->dR;
1218 real * RESTRICT dZ = data->dZ;
1219 FunDensProp dp = { 0 };
1220
1221 assert(grid->ntypso >0);
1222 for(i=0; i<bllen; i++) {
1223 real weight = grid->weight[grid->curr_point+i];
1224 dp.grada = 0.5*sqrt(grid->g.grad[i][0]*grid->g.grad[i][0]+
1225 grid->g.grad[i][1]*grid->g.grad[i][1]+
1226 grid->g.grad[i][2]*grid->g.grad[i][2]);
1227 dp. rhoa = dp. rhob = 0.5*grid->r.rho[i];
1228 dp.gradb = dp.grada;
1229 dp.gradab = dp.grada*dp.gradb;
1230 data->energy += selected_func->func(&dp)*weight;
1231 dftpot0_(&drvs, &weight, &dp);
1232 dR[i] = drvs.fR;
1233 dZ[i] = drvs.fZ/dp.grada;
1234 }
1235
1236 distribute_gga_bl(grid, bllen, blstart, blend, tmp, dR, dZ, excmat);
1237 }
1238
1239 /* dft_kohn_sham:
1240 compute Fock matrix ksm corresponding to given density matrix dmat.
1241 fast version - uses memory bandwidth-efficient algorithm.
1242 */
1243 void
FSYM2(dft_kohn_shamf)1244 FSYM2(dft_kohn_shamf)(real* dmat, real* ksm, real* edfty,
1245 real* work, integer *lwork, integer* iprint)
1246 {
1247 integer nbast2, i, j;
1248 struct tms starttm, endtm; clock_t utm;
1249 real electrons;
1250 struct ks_data ds;
1251
1252 nbast2 = inforb_.nbast*inforb_.nbast;
1253 ds.excmat = calloc(nbast2, sizeof(real));
1254 ds.dR = dal_malloc(DFT_BLLEN*sizeof(real));
1255 ds.dZ = dal_malloc(DFT_BLLEN*sizeof(real));
1256
1257 times(&starttm);
1258 ds.energy = 0.0;
1259
1260 electrons = dft_integrate_ao_bl(1, dmat, work, lwork, iprint, 0,
1261 (DftBlockCallback)
1262 (selected_func->is_gga() ?
1263 kohn_sham_cb_b_gga : kohn_sham_cb_b_lda),
1264 &ds);
1265
1266 for(i=0; i<inforb_.nbast; i++) {
1267 integer ioff = i*inforb_.nbast;
1268 for(j=0; j<i; j++) {
1269 integer joff = j*inforb_.nbast;
1270 real averag = 0.5*(ds.excmat[i+joff] + ds.excmat[j+ioff]);
1271 ds.excmat[i+joff] = ds.excmat[j+ioff] = averag;
1272 }
1273 }
1274
1275 *edfty=ds.energy;
1276 daxpy_(&inforb_.n2basx, &ONER, ds.excmat, &ONEI, ksm, &ONEI);
1277 if(KOHNSH_DEBUG) {
1278 fort_print("kohn sham matrix");
1279 outmat_(ds.excmat,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
1280 &inforb_.nbast, &inforb_.nbast);
1281 }
1282
1283 free(ds.excmat);
1284 free(ds.dR);
1285 free(ds.dZ);
1286 if (*iprint>0) {
1287 times(&endtm);
1288 utm = endtm.tms_utime-starttm.tms_utime;
1289 fort_print(" Electrons: %11.7f %7.1g: Energy %f KS/B time: %9.1f s",
1290 electrons, (electrons-2.0*inforb_.nrhft)/(2.0*inforb_.nrhft),
1291 ds.energy, utm/(double)sysconf(_SC_CLK_TCK));
1292 }
1293 }
1294
1295 /* ------------------------------------------------------------------- */
1296 /* ------ blocked DFT LINEAR RESPONSE CONTRIBUTION EVALUATOR --------- */
1297 /* ------------------------------------------------------------------- */
1298
1299 typedef struct {
1300 real* dmat, *kappa, *res;
1301 real* dtgao;
1302 real* vt; /* dimensioned [bllen] for LDA, [bllen][4] for GGA */
1303 integer trplet, ksymop;
1304 integer vecs_in_batch;
1305 } LinRespBlData;
1306
1307 static void
lin_resp_cb_b_lda(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,LinRespBlData * data)1308 lin_resp_cb_b_lda(DftIntegratorBl* grid, real * RESTRICT tmp,
1309 integer bllen, integer blstart, integer blend,
1310 LinRespBlData* data)
1311 {
1312 real * RESTRICT aos = grid->atv;
1313 real * RESTRICT excmat = data->res;
1314 real (* RESTRICT vt) = data->vt; /* [bllen][4] */
1315 integer ibl, i, jbl, j, k, isym, ivec;
1316 FunDensProp dp = { 0 };
1317 integer blen = bllen;
1318
1319 for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
1320 /* compute vector of transformed densities vt */
1321 FSYM2(getexp_blocked_lda)(&data->ksymop, data->kappa + ivec*inforb_.n2basx,
1322 grid->atv, grid->bas_bl_cnt, grid->basblocks,
1323 &grid->shl_bl_cnt, tmp, &blen, vt);
1324
1325 for(i=blstart; i<blend; i++) {
1326 SecondDrv vxc;
1327 real weight = grid->weight[grid->curr_point+i];
1328 dp.rhoa = dp.rhob = 0.5*grid->r.rho[i];
1329 dftpot1_(&vxc, &weight, &dp, &data->trplet);
1330 vt[i] = vxc.fRR*vt[i]*2;
1331 }
1332
1333 for(isym=0; isym<grid->nsym; isym++) {
1334 integer (*RESTRICT iblocks)[2] = BASBLOCK(grid,isym);
1335 integer ibl_cnt = grid->bas_bl_cnt[isym];
1336
1337 for(ibl=0; ibl<ibl_cnt; ibl++)
1338 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
1339 integer ioff = i*bllen;
1340 for(k=blstart; k<blend; k++)
1341 tmp[k+ioff] = aos[k+ioff]*vt[k];
1342 }
1343
1344 for(ibl=0; ibl<ibl_cnt; ibl++) {
1345 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
1346 integer ioff = i*inforb_.nbast + ivec*inforb_.n2basx;
1347 integer jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
1348 integer (*RESTRICT jblocks)[2] = BASBLOCK(grid,jsym);
1349 integer jbl_cnt = grid->bas_bl_cnt[jsym];
1350 real *RESTRICT tmpi = &tmp[i*bllen];
1351 if (isym<jsym) continue;
1352 for(jbl=0; jbl<jbl_cnt; jbl++) {
1353 integer jtop = min(jblocks[jbl][1],i);
1354 for(j=jblocks[jbl][0]-1; j<jtop; j++) {
1355 for(k=blstart; k<blend; k++)
1356 excmat[j+ioff] +=
1357 aos[k+j*bllen]*tmpi[k];
1358 }
1359 }
1360 for(k=blstart; k<blend; k++)
1361 excmat[i+ioff] += aos[k+i*bllen]*tmpi[k]*0.5;
1362 }
1363 }
1364 }
1365 }
1366 }
1367
1368 static void
lin_resp_cb_b_gga(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,LinRespBlData * data)1369 lin_resp_cb_b_gga(DftIntegratorBl* grid, real * RESTRICT tmp,
1370 integer bllen, integer blstart, integer blend,
1371 LinRespBlData* data)
1372 {
1373 integer ibl, i, jbl, j, k, isym, ivec;
1374 real (* RESTRICT vt3)[4] = (real(*)[4])data->vt; /* [bllen][4] */
1375 real * RESTRICT aos = grid->atv;
1376 real * RESTRICT aox = grid->atv+bllen*inforb_.nbast;
1377 real * RESTRICT aoy = grid->atv+bllen*inforb_.nbast*2;
1378 real * RESTRICT aoz = grid->atv+bllen*inforb_.nbast*3;
1379 real * RESTRICT excmat = data->res;
1380 FunDensProp dp = { 0 };
1381 integer blen = bllen;
1382
1383 for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
1384 /* compute vector of transformed densities and dens. gradients vt3 */
1385 FSYM2(getexp_blocked_gga)(&data->ksymop,data->kappa + ivec*inforb_.n2basx,
1386 grid->atv, grid->bas_bl_cnt,
1387 grid->basblocks, &grid->shl_bl_cnt, tmp,
1388 &blen, vt3);
1389 for(i=blstart; i<blend; i++) {
1390 SecondDrv vxc;
1391 real facr, facg;
1392 real weight = grid->weight[grid->curr_point+i];
1393 real ngrad = sqrt(grid->g.grad[i][0]*grid->g.grad[i][0]+
1394 grid->g.grad[i][1]*grid->g.grad[i][1]+
1395 grid->g.grad[i][2]*grid->g.grad[i][2]);
1396 real brg, brz, b0 = vt3[i][0];
1397 if(ngrad<1e-15|| grid->r.rho[i]<1e-15) {
1398 vt3[i][0] = vt3[i][1] = vt3[i][2] = vt3[i][3] = 0;
1399 continue;
1400 }
1401 brg = (vt3[i][1]*grid->g.grad[i][0] +
1402 vt3[i][2]*grid->g.grad[i][1] +
1403 vt3[i][3]*grid->g.grad[i][2]);
1404 brz = brg/ngrad;
1405 dp. rhoa = dp. rhob = 0.5*grid->r.rho[i];
1406 dp.grada = dp.gradb = 0.5*ngrad;
1407 dp.gradab = dp.grada*dp.gradb;
1408 dftpot1_(&vxc, &weight, &dp, &data->trplet);
1409 facr = vxc.fRZ*b0 + (vxc.fZZ-vxc.fZ/ngrad)*brz + vxc.fZG*brg;
1410 facr = facr/ngrad + (vxc.fRG*b0+vxc.fZG*brz +vxc.fGG*brg);
1411 facg = vxc.fZ/ngrad + vxc.fG;
1412 vt3[i][0] = vxc.fRR*b0 + vxc.fRZ*brz+ vxc.fRG*brg;
1413 vt3[i][1] = (grid->g.grad[i][0]*facr + facg*vt3[i][1])*2;
1414 vt3[i][2] = (grid->g.grad[i][1]*facr + facg*vt3[i][2])*2;
1415 vt3[i][3] = (grid->g.grad[i][2]*facr + facg*vt3[i][3])*2;
1416 }
1417
1418 for(isym=0; isym<grid->nsym; isym++) {
1419 integer (*RESTRICT iblocks)[2] = BASBLOCK(grid,isym);
1420 integer ibl_cnt = grid->bas_bl_cnt[isym];
1421
1422 for(ibl=0; ibl<ibl_cnt; ibl++) {
1423 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
1424 real *RESTRICT g0i = &aos[i*bllen];
1425 real *RESTRICT gxi = &aox[i*bllen];
1426 real *RESTRICT gyi = &aoy[i*bllen];
1427 real *RESTRICT gzi = &aoz[i*bllen];
1428 integer ioff = i*inforb_.nbast + ivec*inforb_.n2basx;
1429 integer jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
1430 integer (*RESTRICT jblocks)[2] = BASBLOCK(grid,jsym);
1431 integer jbl_cnt = grid->bas_bl_cnt[jsym];
1432 for(k=blstart; k<blend; k++)
1433 tmp[k] = (vt3[k][0]*g0i[k] +
1434 vt3[k][1]*gxi[k] +
1435 vt3[k][2]*gyi[k] +
1436 vt3[k][3]*gzi[k]);
1437 for(jbl=0; jbl<jbl_cnt; jbl++) {
1438 integer jtop = jblocks[jbl][1];
1439 for(j=jblocks[jbl][0]-1; j<jtop; j++) {
1440 real *RESTRICT g0j = &aos[j*bllen];
1441 integer bllen = blend - blstart;
1442 real s = ddot_(&bllen, g0j+blstart, &ONEI, tmp+blstart, &ONEI);
1443 excmat[j+ioff] += s;
1444 }
1445 }
1446 }
1447 }
1448 }
1449 }
1450 }
1451
1452 /* dft_lin_respf_:
1453 Main Linear Response evaluator.
1454 FMAT(NORBT,NORBT,NOSIM) - result added to FMAT. Must not be referenced on slaves.
1455 cmo -const
1456 zymat(NORBT,NORBT,NOSIM) - const response vector.
1457 trplet - triplet excitation? (bool)
1458 NOSIM - number of simultaneously transformed response vectors.
1459 */
1460 void
FSYM2(dft_lin_respf)1461 FSYM2(dft_lin_respf)(integer *nosim, real* fmat, real *cmo, real *zymat,
1462 integer *trplet, integer *ksymop,
1463 real* work, integer* lwork, integer* iprint)
1464 {
1465 /* MAX_VEC determines the number of simultaneusly transformed
1466 vectors. Set it to a small mumber (eg. 3) - the only operation
1467 that is saved is evaluation of orbitals and density and density
1468 gradients which is usually small compared to the time spent in
1469 actual vector transformation. Larger values will only increate
1470 memory utilization without positive impact on performance.
1471 FIXME: consider using work for this purpose. */
1472 static const integer MAX_VEC = 5;
1473 static const real DP5R = 0.5;
1474 struct tms starttm, endtm; clock_t utm;
1475 real electrons = 0;
1476 LinRespBlData lr_data; /* linear response data */
1477 real dummy;
1478 integer i, j, ivec, jvec;
1479 integer max_vecs;
1480
1481 /* WARNING: NO work MAY BE done before syncing slaves! */
1482 dft_wake_slaves((DFTPropEvalMaster)FSYM2(dft_lin_respf)); /* NO-OP in serial */
1483 dft_lin_resp_sync_slaves(cmo,nosim,&zymat,trplet,ksymop,
1484 &work, *lwork); /* NO-OP in serial */
1485
1486 times(&starttm);
1487 max_vecs = *nosim>MAX_VEC ? MAX_VEC : *nosim;
1488 lr_data.dmat = dal_malloc(inforb_.n2basx*sizeof(real));
1489 lr_data.res = dal_malloc(inforb_.n2basx*sizeof(real)*max_vecs);
1490 lr_data.kappa = dal_malloc(inforb_.n2basx*sizeof(real)*max_vecs);
1491 lr_data.vt = dal_malloc(DFT_BLLEN*4 *sizeof(real));
1492 lr_data.dtgao = dal_malloc(inforb_.nbast *sizeof(real));
1493 lr_data.trplet= *trplet;
1494 lr_data.ksymop= *ksymop;
1495 FSYM2(dft_get_ao_dens_mat)(cmo, lr_data.dmat, work, lwork);
1496
1497 for(ivec=0; ivec<*nosim; ivec+=max_vecs) {
1498 integer sz;
1499 lr_data.vecs_in_batch = ivec + max_vecs > *nosim ? *nosim - ivec : max_vecs;
1500 sz = lr_data.vecs_in_batch * inforb_.n2basx;
1501 FSYM(dzero)(lr_data.kappa, &sz);
1502 for(jvec=0; jvec<lr_data.vecs_in_batch; jvec++){
1503 FSYM(deq27)(cmo,zymat+(ivec+jvec)*inforb_.n2orbx,&dummy,
1504 lr_data.kappa+jvec*inforb_.n2basx, &dummy,work,lwork);
1505 dscal_(&inforb_.n2basx,&DP5R,lr_data.kappa+jvec*inforb_.n2basx,&ONEI);
1506 }
1507 FSYM(dzero)(lr_data.res, &sz);
1508 electrons = dft_integrate_ao_bl(1, lr_data.dmat, work, lwork, iprint, 0,
1509 (DftBlockCallback)
1510 (selected_func->is_gga() ?
1511 lin_resp_cb_b_gga : lin_resp_cb_b_lda),
1512 &lr_data);
1513 #ifdef VAR_MPI
1514 dft_lin_resp_collect_info(lr_data.vecs_in_batch,lr_data.res, work, *lwork);
1515 if(fmat == NULL) /* The transformations below are done only by master. */
1516 continue;
1517 #endif
1518 if(DFTLR_DEBUG) {
1519 fort_print("AO Fock matrix contribution in dft_lin_respf");
1520 outmat_(lr_data.res,&ONEI,&inforb_.nbast,&ONEI,&inforb_.nbast,
1521 &inforb_.nbast, &inforb_.nbast);
1522 }
1523 for(jvec=0; jvec<lr_data.vecs_in_batch; jvec++){
1524 for(i=0; i<inforb_.nbast; i++) {
1525 integer ioff = i*inforb_.nbast + jvec*inforb_.n2basx;
1526 for(j=0; j<i; j++) {
1527 integer joff = j*inforb_.nbast + jvec*inforb_.n2basx;
1528 real averag = 0.5*(lr_data.res[i+joff] +
1529 lr_data.res[j+ioff]);
1530 lr_data.res[i+joff] = lr_data.res[j+ioff] = averag;
1531 }
1532 }
1533 /* transform to MO Fock matrix contribution */
1534 FSYM(lrao2mo)(cmo, &lr_data.ksymop, lr_data.res+jvec*inforb_.n2basx,
1535 fmat+(ivec+jvec)*inforb_.n2orbx, work, lwork);
1536 }
1537 if(DFTLR_DEBUG) {
1538 fort_print("MO Fock matrix contribution in dft_lin_resp");
1539 outmat_(fmat,&ONEI,&inforb_.norbt,&ONEI,&inforb_.norbt,
1540 &inforb_.norbt, &inforb_.norbt);
1541 }
1542 }
1543
1544 free(lr_data.dmat);
1545 free(lr_data.res);
1546 free(lr_data.kappa);
1547 free(lr_data.vt);
1548 free(lr_data.dtgao);
1549 if (*iprint>0) {
1550 times(&endtm);
1551 utm = endtm.tms_utime-starttm.tms_utime;
1552 fort_print(" Electrons: %f(%9.3g): LR-DFT*%d evaluation time: %9.1f s",
1553 electrons, (double)(electrons-(integer)(electrons+0.5)), *nosim,
1554 utm/(double)sysconf(_SC_CLK_TCK));
1555 }
1556 }
1557
1558 void
dft_lin_respf_slave(real * work,integer * lwork,integer * iprint)1559 dft_lin_respf_slave(real* work, integer* lwork, integer* iprint)
1560 {
1561 real *cmo = malloc(inforb_.norbt*inforb_.nbast*sizeof(real)); /* IN */
1562 integer trplet; /* IN: will be synced from master */
1563 integer ksymop, nosim;
1564 FSYM2(dft_lin_respf)(&nosim, NULL, cmo, NULL,
1565 &trplet, &ksymop, work, lwork, iprint);
1566 free(cmo);
1567 }
1568
1569
1570 /*====================================================================*/
1571 /* BLOCKED OPEN-SHELL CODE */
1572 /* Z. Rinkevicius, June 2008 */
1573 /*====================================================================*/
1574
1575 /* Kohn-Sham evaluator */
1576
1577 struct ks_data_ab {
1578 real *ksma, *ksmb;
1579 real energy;
1580 real *dRa;
1581 real *dRb;
1582 real (*dZa);
1583 real (*dZb);
1584 real (*dZab);
1585 real *tmpa, *tmpb;
1586 };
1587
1588
1589 #if defined(VAR_MPI)
1590 void
dft_kohn_shamab_b_slave(real * work,integer * lwork,integer * iprint)1591 dft_kohn_shamab_b_slave(real* work, integer* lwork, integer* iprint)
1592 {
1593 real* dmat = malloc(2*inforb_.n2basx*sizeof(real));
1594 FSYM2(dft_kohn_shamab_b)(dmat, NULL, NULL, work, lwork, iprint);
1595 free(dmat);
1596 }
1597
1598 extern void FSYM(dftintbcast)(void);
1599
1600 void
dft_kohn_shamab_b_sync_slaves(real * dmat)1601 dft_kohn_shamab_b_sync_slaves(real* dmat)
1602 {
1603 FSYM(dftintbcast)();
1604 MPI_Bcast(dmat, 2*inforb_.n2basx,MPI_DOUBLE,
1605 MASTER_NO, MPI_COMM_WORLD);
1606 }
1607
1608 static void
dft_kohn_shamab_b_collect(real * energy,real * ksm,real * work,integer lwork)1609 dft_kohn_shamab_b_collect(real *energy, real *ksm, real* work, integer lwork)
1610 {
1611 real tmp = *energy;
1612 int sz = 0;
1613 MPI_Comm_size(MPI_COMM_WORLD, &sz);
1614 if(sz <=1) return;
1615 sz = 2*inforb_.n2basx;
1616 integer sz_ftn = sz;
1617 CHECK_WRKMEM(sz, lwork);
1618 dcopy_(&sz_ftn, ksm,&ONEI, work, &ONEI);
1619 MPI_Reduce(work, ksm, sz, MPI_DOUBLE, MPI_SUM,
1620 MASTER_NO, MPI_COMM_WORLD);
1621 MPI_Reduce(&tmp, energy, 1, MPI_DOUBLE, MPI_SUM,
1622 MASTER_NO, MPI_COMM_WORLD);
1623 }
1624 #endif
1625
1626 static void
distribute_lda_ab_bl(DftIntegratorBl * grid,integer bllen,integer blstart,integer blend,real * RESTRICT tmpa,real * RESTRICT tmpb,real * RESTRICT dRa,real * RESTRICT dRb,real * RESTRICT exc_a,real * RESTRICT exc_b)1627 distribute_lda_ab_bl(DftIntegratorBl *grid, integer bllen, integer blstart, integer blend,
1628 real * RESTRICT tmpa,real * RESTRICT tmpb,
1629 real *RESTRICT dRa, real *RESTRICT dRb,
1630 real * RESTRICT exc_a, real * RESTRICT exc_b)
1631 {
1632 integer isym, jbl, j, ibl, i, k;
1633 real * RESTRICT aos = grid->atv;
1634
1635 for(isym=0; isym<grid->nsym; isym++) {
1636 integer (*RESTRICT blocks)[2] = BASBLOCK(grid,isym);
1637 integer bl_cnt = grid->bas_bl_cnt[isym];
1638
1639 for(jbl=0; jbl<bl_cnt; jbl++)
1640 for(j=blocks[jbl][0]-1; j<blocks[jbl][1]; j++) {
1641 integer joff = j*bllen;
1642 real *RESTRICT e_jcola = exc_a+ j*inforb_.nbast;
1643 real *RESTRICT e_jcolb = exc_b+ j*inforb_.nbast;
1644 for(k=blstart; k<blend; k++) {
1645 tmpa[k] = aos[k+joff]*dRa[k];
1646 tmpb[k] = aos[k+joff]*dRb[k];
1647 }
1648 for(ibl=0; ibl<bl_cnt; ibl++) {
1649 integer top = blocks[ibl][1] < j
1650 ? blocks[ibl][1] : j;
1651 for(i=blocks[ibl][0]-1; i<top; i++) {
1652 real *RESTRICT aosi = aos + i*bllen; /* ith orbital */
1653 for(k=blstart; k<blend; k++) {
1654 e_jcola[i] += aosi[k]*tmpa[k];
1655 e_jcolb[i] += aosi[k]*tmpb[k];
1656 }
1657 }
1658 }
1659 for(k=blstart; k<blend; k++) {
1660 e_jcola[j] += 0.5*aos[k+j*bllen]*tmpa[k];
1661 e_jcolb[j] += 0.5*aos[k+j*bllen]*tmpb[k];
1662 }
1663 }
1664 }
1665 }
1666
1667 static void
distribute_gga_ab_bl(DftIntegratorBl * grid,integer bllen,integer blstart,integer blend,real * RESTRICT tmpa,real * RESTRICT tmpb,real * RESTRICT dRa,real * RESTRICT dRb,real * RESTRICT dZa,real * RESTRICT dZb,real * RESTRICT dZab,real * RESTRICT exc_a,real * RESTRICT exc_b)1668 distribute_gga_ab_bl(DftIntegratorBl *grid, integer bllen, integer blstart, integer blend,
1669 real * RESTRICT tmpa, real * RESTRICT tmpb,
1670 real *RESTRICT dRa, real *RESTRICT dRb,
1671 real *RESTRICT dZa, real *RESTRICT dZb,
1672 real *RESTRICT dZab,
1673 real * RESTRICT exc_a, real * RESTRICT exc_b)
1674 {
1675 integer isym, jbl, j, ibl, i, k;
1676 real * RESTRICT aox = grid->atv+bllen*inforb_.nbast;
1677 real * RESTRICT aoy = grid->atv+bllen*inforb_.nbast*2;
1678 real * RESTRICT aoz = grid->atv+bllen*inforb_.nbast*3;
1679 real * RESTRICT aos = grid->atv;
1680
1681 for(isym=0; isym<grid->nsym; isym++) {
1682 integer (*RESTRICT blocks)[2] = BASBLOCK(grid,isym);
1683 integer nblocks = grid->bas_bl_cnt[isym];
1684 for(jbl=0; jbl<nblocks; jbl++)
1685 for(j=blocks[jbl][0]-1; j<blocks[jbl][1]; j++) {
1686 integer joff = j*bllen;
1687 real *RESTRICT e_jcola = exc_a + j*inforb_.nbast;
1688 real *RESTRICT e_jcolb = exc_b + j*inforb_.nbast;
1689 for(k=blstart; k<blend; k++) {
1690 tmpa[k] =
1691 dRa[k]* aos[k+joff]+
1692 dZa[k]*(aox[k+joff]*grid->g.rad.a[k][0]+
1693 aoy[k+joff]*grid->g.rad.a[k][1]+
1694 aoz[k+joff]*grid->g.rad.a[k][2])+
1695 dZab[k]*(aox[k+joff]*grid->g.rad.b[k][0]+
1696 aoy[k+joff]*grid->g.rad.b[k][1]+
1697 aoz[k+joff]*grid->g.rad.b[k][2]);
1698 tmpb[k] =
1699 dRb[k]* aos[k+joff]+
1700 dZb[k]*(aox[k+joff]*grid->g.rad.b[k][0]+
1701 aoy[k+joff]*grid->g.rad.b[k][1]+
1702 aoz[k+joff]*grid->g.rad.b[k][2])+
1703 dZab[k]*(aox[k+joff]*grid->g.rad.a[k][0]+
1704 aoy[k+joff]*grid->g.rad.a[k][1]+
1705 aoz[k+joff]*grid->g.rad.a[k][2]);
1706 }
1707 for(ibl=0; ibl<nblocks; ibl++) {
1708 for(i=blocks[ibl][0]-1; i<blocks[ibl][1]; i++) {
1709 for(k=blstart; k<blend; k++) {
1710 e_jcola[i] += aos[k+i*bllen]*tmpa[k];
1711 e_jcolb[i] += aos[k+i*bllen]*tmpb[k];
1712 }
1713 }
1714 }
1715 }
1716 }
1717 }
1718
1719 static void
kohn_shamab_cb_b_lda(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,struct ks_data_ab * data)1720 kohn_shamab_cb_b_lda(DftIntegratorBl *grid, real * RESTRICT tmp,
1721 integer bllen, integer blstart, integer blend,
1722 struct ks_data_ab* data)
1723 {
1724
1725 FunFirstFuncDrv drvs;
1726 integer i;
1727 real * RESTRICT exc_a = data->ksma;
1728 real * RESTRICT exc_b = data->ksmb;
1729 real * RESTRICT dRa = data->dRa;
1730 real * RESTRICT dRb = data->dRb;
1731 FunDensProp dp = { 0 };
1732
1733 assert(grid->ntypso >0);
1734 for(i=blstart; i<blend; i++) {
1735 real weight = grid->weight[grid->curr_point+i];
1736 dp.rhoa = grid->r.ho.a[i];
1737 dp.rhob = grid->r.ho.b[i];
1738 data->energy += selected_func->func(&dp)*weight;
1739 drv1_clear(&drvs);
1740 selected_func->first(&drvs, weight, &dp);
1741 dRa[i] = 2.0*drvs.df1000;
1742 dRb[i] = 2.0*drvs.df0100;
1743 }
1744 distribute_lda_ab_bl(grid, bllen, blstart, blend, data->tmpa, data->tmpb,
1745 dRa, dRb, exc_a, exc_b);
1746 }
1747
1748 static void
kohn_shamab_cb_b_gga(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,struct ks_data_ab * data)1749 kohn_shamab_cb_b_gga(DftIntegratorBl* grid, real * RESTRICT tmp,
1750 integer bllen, integer blstart, integer blend,
1751 struct ks_data_ab* data)
1752 {
1753 FunFirstFuncDrv drvs;
1754 integer i;
1755 real * RESTRICT exc_a = data->ksma;
1756 real * RESTRICT exc_b = data->ksmb;
1757 real * RESTRICT dRa = data->dRa;
1758 real * RESTRICT dRb = data->dRb;
1759 real * RESTRICT dZa = data->dZa;
1760 real * RESTRICT dZb = data->dZb;
1761 real * RESTRICT dZab = data->dZab;
1762 FunDensProp dp = { 0 };
1763
1764 assert(grid->ntypso >0);
1765 for(i=blstart; i<blend; i++) {
1766 real weight = grid->weight[grid->curr_point+i];
1767 dp.rhoa = grid->r.ho.a[i];
1768 dp.rhob = grid->r.ho.b[i];
1769 dp.grada = sqrt(grid->g.rad.a [i][0]*grid->g.rad.a [i][0]+
1770 grid->g.rad.a [i][1]*grid->g.rad.a [i][1]+
1771 grid->g.rad.a [i][2]*grid->g.rad.a [i][2]);
1772 dp.grada = fabs(dp.grada)>1e-40 ? dp.grada : 1e-40;
1773 dp.gradb = sqrt(grid->g.rad.b [i][0]*grid->g.rad.b [i][0]+
1774 grid->g.rad.b [i][1]*grid->g.rad.b [i][1]+
1775 grid->g.rad.b [i][2]*grid->g.rad.b [i][2]);
1776 dp.gradb = fabs(dp.gradb)>1e-40 ? dp.gradb : 1e-40;
1777 dp.gradab = grid->g.rad.a [i][0]*grid->g.rad.b [i][0]+
1778 grid->g.rad.a [i][1]*grid->g.rad.b [i][1]+
1779 grid->g.rad.a [i][2]*grid->g.rad.b [i][2];
1780 data->energy += selected_func->func(&dp)*weight;
1781 drv1_clear(&drvs);
1782 selected_func->first(&drvs, weight, &dp);
1783 dRa[i] = drvs.df1000;
1784 dRb[i] = drvs.df0100;
1785 dZa[i] = 2.0*drvs.df0010/dp.grada;
1786 dZb[i] = 2.0*drvs.df0001/dp.gradb;
1787 dZab[i] = 2.0*drvs.df00001;
1788 }
1789 distribute_gga_ab_bl(grid, bllen, blstart, blend, data->tmpa, data->tmpb,
1790 dRa, dRb, dZa, dZb, dZab, exc_a, exc_b);
1791 }
1792
1793 void
FSYM2(dft_kohn_shamab_b)1794 FSYM2(dft_kohn_shamab_b)(real* dmat, real* ksm, real *edfty,
1795 real* work, integer *lwork, integer* iprint)
1796 {
1797 integer i, j;
1798 integer sz;
1799 struct ks_data_ab result; /* res like result */
1800 struct tms starttm, endtm; clock_t utm;
1801 real electrons, exp_el = 2.0*inforb_.nrhft+inforb_.nasht;
1802
1803 /* parallel code insert */
1804 #if defined(VAR_MPI)
1805 dft_wake_slaves((DFTPropEvalMaster)FSYM2(dft_kohn_shamab_b));
1806 dft_kohn_shamab_b_sync_slaves(dmat);
1807 #endif
1808 /* memory allocation */
1809 result.ksma = calloc(2*inforb_.n2basx, sizeof(real));
1810 result.ksmb = result.ksma + inforb_.n2basx;
1811 result.dRa = dal_malloc(DFT_BLLEN*sizeof(real));
1812 result.dRb = dal_malloc(DFT_BLLEN*sizeof(real));
1813 result.dZa = dal_malloc(DFT_BLLEN*sizeof(real));
1814 result.dZb = dal_malloc(DFT_BLLEN*sizeof(real));
1815 result.dZab = dal_malloc(DFT_BLLEN*sizeof(real));
1816 result.tmpa = dal_malloc(DFT_BLLEN*sizeof(real));
1817 result.tmpb = dal_malloc(DFT_BLLEN*sizeof(real));
1818 result.energy = 0.0;
1819 /* start timer*/
1820 times(&starttm);
1821 /* integeration */
1822 electrons = dft_integrate_ao_bl(2, dmat, work, lwork, iprint, 0,
1823 (DftBlockCallback)(selected_func->is_gga() ?
1824 kohn_shamab_cb_b_gga:kohn_shamab_cb_b_lda),
1825 &result);
1826 /* parallel code insert */
1827 #if defined(VAR_MPI)
1828 /* collect data */
1829 dft_kohn_shamab_b_collect(&result.energy,result.ksma, work, *lwork);
1830 /* free memory and leave if slave*/
1831 if (edfty == NULL) {
1832 free(result.ksma);
1833 free(result.dRa);
1834 free(result.dRb);
1835 free(result.dZa);
1836 free(result.dZb);
1837 free(result.dZab);
1838 free(result.tmpa);
1839 free(result.tmpb);
1840 return;
1841 };
1842 #endif
1843 /* average Kohn-Sham matrices */
1844 for(i=0; i<inforb_.nbast; i++) {
1845 integer ioff = i*inforb_.nbast;
1846 for(j=0; j<i; j++) {
1847 integer joff = j*inforb_.nbast;
1848 real averag = 0.5*(result.ksma[i+joff] + result.ksma[j+ioff]);
1849 result.ksma[i+joff] = result.ksma[j+ioff] = averag;
1850 averag = 0.5*(result.ksmb[i+joff] + result.ksmb[j+ioff]);
1851 result.ksmb[i+joff] = result.ksmb[j+ioff] = averag;
1852 }
1853 }
1854 /* results */
1855 *edfty=result.energy;
1856 sz = 2*inforb_.n2basx;
1857 FSYM(daxpy)(&sz, &ONER, result.ksma, &ONEI, ksm, &ONEI);
1858 /* free memory */
1859 free(result.ksma);
1860 free(result.dRa);
1861 free(result.dRb);
1862 free(result.dZa);
1863 free(result.dZb);
1864 free(result.dZab);
1865 free(result.tmpa);
1866 free(result.tmpb);
1867 /* timings & print out*/
1868 if (*iprint>0) {
1869 times(&endtm);
1870 utm = endtm.tms_utime-starttm.tms_utime;
1871 fort_print(" Electrons: %11.7f %9.1g: Energy %f KS-AB/B time: %9.1f s",
1872 electrons, (electrons-exp_el)/exp_el,
1873 result.energy, utm/(double)sysconf(_SC_CLK_TCK));
1874 }
1875 }
1876
1877 /* Linear response contribution evaluator */
1878
1879 typedef struct {
1880 real* dmata, *dmatb;
1881 real* kappa_a, *kappa_b;
1882 real* res_a, *res_b;
1883 real* dtgaoa, *dtgaob;
1884 real* rhowa, *rhowb;
1885 real* vta, *vtb;
1886 real* tmpa, *tmpb;
1887 integer trplet, ksymop, vecs_in_batch;
1888 } LinResp_ab_BlData;
1889
1890
1891 #if defined(VAR_MPI)
1892 void
dft_lin_respab_b_slave(real * work,integer * lwork,integer * iprint)1893 dft_lin_respab_b_slave(real* work, integer* lwork, integer* iprint)
1894 {
1895 real *cmo = malloc(inforb_.norbt*inforb_.nbast*sizeof(real));
1896 integer trplet;
1897 integer ksymop, nosim;
1898 FSYM2(dft_lin_respab_b)(&nosim, NULL, NULL, cmo, NULL,
1899 &trplet, &ksymop, work, lwork, iprint);
1900 free(cmo);
1901 }
1902
1903 static void
dft_lin_respab_b_collect(integer nvec,real * fmata,real * fmatb,real * work,integer lwork)1904 dft_lin_respab_b_collect(integer nvec, real* fmata, real* fmatb, real *work, integer lwork)
1905 {
1906 int sz = 0;
1907 MPI_Comm_size(MPI_COMM_WORLD, &sz);
1908 if(sz<=1) return;
1909
1910 sz = nvec*inforb_.n2basx;
1911 integer sz_ftn =sz;
1912 if(sz>lwork) {
1913 CHECK_WRKMEM(inforb_.n2basx,lwork);
1914 for(sz=0; sz<nvec; sz++) {
1915 dcopy_(&inforb_.n2basx, fmata + sz*inforb_.n2basx,
1916 &ONEI, work, &ONEI);
1917 MPI_Reduce(work, fmata+sz*inforb_.n2basx, inforb_.n2basx,
1918 MPI_DOUBLE, MPI_SUM, MASTER_NO, MPI_COMM_WORLD);
1919 }
1920 for(sz=0; sz<nvec; sz++) {
1921 dcopy_(&inforb_.n2basx, fmatb + sz*inforb_.n2basx,
1922 &ONEI, work, &ONEI);
1923 MPI_Reduce(work, fmatb+sz*inforb_.n2basx, inforb_.n2basx,
1924 MPI_DOUBLE, MPI_SUM, MASTER_NO, MPI_COMM_WORLD);
1925 }
1926 } else {
1927 CHECK_WRKMEM(sz, lwork);
1928 dcopy_(&sz_ftn, fmata, &ONEI, work, &ONEI);
1929 MPI_Reduce(work, fmata, sz, MPI_DOUBLE, MPI_SUM,
1930 MASTER_NO, MPI_COMM_WORLD);
1931 dcopy_(&sz_ftn, fmatb, &ONEI, work, &ONEI);
1932 MPI_Reduce(work, fmatb, sz, MPI_DOUBLE, MPI_SUM,
1933 MASTER_NO, MPI_COMM_WORLD);
1934 }
1935 }
1936 #endif
1937
1938 static void
lin_resp_cb_b_lda_ab(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,LinResp_ab_BlData * data)1939 lin_resp_cb_b_lda_ab(DftIntegratorBl* grid, real * RESTRICT tmp,
1940 integer bllen, integer blstart, integer blend,
1941 LinResp_ab_BlData* data)
1942 {
1943 static const real MONER = -1.0;
1944 real * RESTRICT aos = grid->atv;
1945 real * RESTRICT rhowa = data->rhowa;
1946 real * RESTRICT rhowb = data->rhowb;
1947 real * RESTRICT exc_a = data->res_a;
1948 real * RESTRICT exc_b = data->res_b;
1949 real * RESTRICT vta = data->vta;
1950 real * RESTRICT vtb = data->vtb;
1951 real * RESTRICT tmpa = data->tmpa;
1952 real * RESTRICT tmpb = data->tmpb;
1953 integer ibl, i, jbl, j, k, isym, ivec;
1954 integer bl_size = DFT_BLLEN;
1955 FunSecondFuncDrv vxc;
1956 FunDensProp dp = { 0 };
1957 integer blen = bllen;
1958
1959
1960 for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
1961 /* evaluate transformed densities */
1962 FSYM2(getexp_blocked_lda)(&data->ksymop, data->kappa_a + ivec*inforb_.n2basx,
1963 grid->atv, grid->bas_bl_cnt, grid->basblocks,
1964 &grid->shl_bl_cnt, tmp, &blen, rhowa);
1965 FSYM2(getexp_blocked_lda)(&data->ksymop, data->kappa_b + ivec*inforb_.n2basx,
1966 grid->atv, grid->bas_bl_cnt, grid->basblocks,
1967 &grid->shl_bl_cnt, tmp, &blen, rhowb);
1968 /* change sign if triplet response */
1969 if (data->trplet) FSYM(dscal)(&bl_size,&MONER,rhowb,&ONEI);
1970 for(i=blstart; i<blend; i++) {
1971 real weight = grid->weight[grid->curr_point+i];
1972 dp.rhoa = grid->r.ho.a[i];
1973 dp.rhob = grid->r.ho.b[i];
1974 drv2_clear(&vxc);
1975 selected_func->second(&vxc,weight,&dp);
1976 vta[i] = vxc.df2000*rhowa[i]+vxc.df1100*rhowb[i];
1977 vtb[i] = vxc.df0200*rhowb[i]+vxc.df1100*rhowa[i];
1978 }
1979 for(isym=0; isym<grid->nsym; isym++) {
1980 integer (*RESTRICT iblocks)[2] = BASBLOCK(grid,isym);
1981 integer ibl_cnt = grid->bas_bl_cnt[isym];
1982 for(ibl=0; ibl<ibl_cnt; ibl++)
1983 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
1984 integer ioff = i*bllen;
1985 for(k=blstart; k<blend; k++){
1986 tmpa[k+ioff] = aos[k+ioff]*vta[k];
1987 tmpb[k+ioff] = aos[k+ioff]*vtb[k];
1988 }
1989 }
1990 for(ibl=0; ibl<ibl_cnt; ibl++) {
1991 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
1992 integer ioff = i*inforb_.nbast + ivec*inforb_.n2basx;
1993 integer jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
1994 integer (*RESTRICT jblocks)[2] = BASBLOCK(grid,jsym);
1995 integer jbl_cnt = grid->bas_bl_cnt[jsym];
1996 real *RESTRICT tmpai = &tmpa[i*bllen];
1997 real *RESTRICT tmpbi = &tmpb[i*bllen];
1998 if (isym<jsym) continue;
1999 for(jbl=0; jbl<jbl_cnt; jbl++) {
2000 integer jtop = min(jblocks[jbl][1],i);
2001 for(j=jblocks[jbl][0]-1; j<jtop; j++) {
2002 for(k=blstart; k<blend; k++) {
2003 exc_a[j+ioff] += aos[k+j*bllen]*tmpai[k];
2004 exc_b[j+ioff] += aos[k+j*bllen]*tmpbi[k];
2005 }
2006 }
2007 }
2008 for(k=blstart; k<blend; k++) {
2009 exc_a[i+ioff] += aos[k+i*bllen]*tmpai[k]*0.5;
2010 exc_b[i+ioff] += aos[k+i*bllen]*tmpbi[k]*0.5;
2011 }
2012 }
2013 }
2014 }
2015 }
2016 }
2017
2018 static void
lin_resp_cb_b_gga_ab(DftIntegratorBl * grid,real * RESTRICT tmp,integer bllen,integer blstart,integer blend,LinResp_ab_BlData * data)2019 lin_resp_cb_b_gga_ab(DftIntegratorBl* grid, real * RESTRICT tmp,
2020 integer bllen, integer blstart, integer blend,
2021 LinResp_ab_BlData* data)
2022 {
2023 static const real MONER = -1.0;
2024 integer ibl, i, jbl, j, k, isym, ivec;
2025 integer bl_size = 4*DFT_BLLEN;
2026 real * RESTRICT aos = grid->atv;
2027 real * RESTRICT aox = grid->atv+bllen*inforb_.nbast;
2028 real * RESTRICT aoy = grid->atv+bllen*inforb_.nbast*2;
2029 real * RESTRICT aoz = grid->atv+bllen*inforb_.nbast*3;
2030 real (* RESTRICT rhowa)[4] = (real(*)[4])data->rhowa;
2031 real (* RESTRICT rhowb)[4] = (real(*)[4])data->rhowb;
2032 real (* RESTRICT vt3a)[4] = (real(*)[4])data->vta;
2033 real (* RESTRICT vt3b)[4] = (real(*)[4])data->vtb;
2034 real * RESTRICT exc_a = data->res_a;
2035 real * RESTRICT exc_b = data->res_b;
2036 real * RESTRICT tmpa = data->tmpa;
2037 real * RESTRICT tmpb = data->tmpb;
2038 FunDensProp dp = { 0 };
2039 FunSecondFuncDrv vxc;
2040 real znva, znvb;
2041 real zetaa, zetab, zetac;
2042 real fac0a, fac0b;
2043 real facra, facrb, facg;
2044 integer blen = bllen;
2045
2046 for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
2047 /* compute vector of transformed densities and dens. gradients */
2048 FSYM2(getexp_blocked_gga)(&data->ksymop,data->kappa_a + ivec*inforb_.n2basx,
2049 grid->atv, grid->bas_bl_cnt,
2050 grid->basblocks, &grid->shl_bl_cnt, tmp, &blen, rhowa);
2051 FSYM2(getexp_blocked_gga)(&data->ksymop,data->kappa_b + ivec*inforb_.n2basx,
2052 grid->atv, grid->bas_bl_cnt,
2053 grid->basblocks, &grid->shl_bl_cnt, tmp, &blen, rhowb);
2054 if (data->trplet) FSYM(dscal)(&bl_size,&MONER,(real*)rhowb,&ONEI);
2055 for(i=blstart; i<blend; i++) {
2056 /* Compute density parameters at grid point */
2057 real weight = grid->weight[grid->curr_point+i];
2058 dp.rhoa = grid->r.ho.a[i];
2059 dp.rhob = grid->r.ho.b[i];
2060 dp.grada = sqrt(grid->g.rad.a [i][0]*grid->g.rad.a [i][0]+
2061 grid->g.rad.a [i][1]*grid->g.rad.a [i][1]+
2062 grid->g.rad.a [i][2]*grid->g.rad.a [i][2]);
2063 dp.gradb = sqrt(grid->g.rad.b [i][0]*grid->g.rad.b [i][0]+
2064 grid->g.rad.b [i][1]*grid->g.rad.b [i][1]+
2065 grid->g.rad.b [i][2]*grid->g.rad.b [i][2]);
2066 dp.gradab = grid->g.rad.a [i][0]*grid->g.rad.b [i][0]+
2067 grid->g.rad.a [i][1]*grid->g.rad.b [i][1]+
2068 grid->g.rad.a [i][2]*grid->g.rad.b [i][2];
2069 if(dp.grada<1e-20 || dp.rhoa<1e-20 || dp.gradb<1e-20 || dp.rhob<1e-20) {
2070 vt3a[i][0] = vt3a[i][1] = vt3a[i][2] = vt3a[i][3] = 0;
2071 vt3b[i][0] = vt3b[i][1] = vt3b[i][2] = vt3b[i][3] = 0;
2072 continue;
2073 }
2074 if(dp.grada<1e-20) dp.grada = 1e-20;
2075 if(dp.gradb<1e-20) dp.gradb = 1e-20;
2076 /* second derivatives calculation */
2077 drv2_clear(&vxc);
2078 selected_func->second(&vxc,weight,&dp);
2079 /* compute zetas */
2080 znva = 1.0/dp.grada;
2081 znvb = 1.0/dp.gradb;
2082 zetaa = rhowa[i][1]*grid->g.rad.a [i][0]+
2083 rhowa[i][2]*grid->g.rad.a [i][1]+
2084 rhowa[i][3]*grid->g.rad.a [i][2];
2085 zetaa = zetaa*znva;
2086 zetab = rhowb[i][1]*grid->g.rad.b [i][0]+
2087 rhowb[i][2]*grid->g.rad.b [i][1]+
2088 rhowb[i][3]*grid->g.rad.b [i][2];
2089 zetab = zetab*znvb;
2090 zetac = rhowa[i][1]*grid->g.rad.b [i][0]+
2091 rhowa[i][2]*grid->g.rad.b [i][1]+
2092 rhowa[i][3]*grid->g.rad.b [i][2]+
2093 rhowb[i][1]*grid->g.rad.a [i][0]+
2094 rhowb[i][2]*grid->g.rad.a [i][1]+
2095 rhowb[i][3]*grid->g.rad.a [i][2];
2096 /* rhoa and rhob dependent prefactors */
2097 fac0a = vxc.df2000*rhowa[i][0]+vxc.df1100*rhowb[i][0]+
2098 vxc.df1010*zetaa+vxc.df1001*zetab+vxc.df10001*zetac;
2099 fac0b = vxc.df0200*rhowb[i][0]+vxc.df1100*rhowa[i][0]+
2100 vxc.df0101*zetab+vxc.df0110*zetaa+vxc.df01001*zetac;
2101 vt3a[i][0] = 0.5*fac0a;
2102 vt3b[i][0] = 0.5*fac0b;
2103 /* grada and gradb dependent prefactors*/
2104 facra = vxc.df1010*rhowa[i][0]+vxc.df0110*rhowb[i][0]+
2105 vxc.df0020*zetaa+vxc.df0011*zetab+vxc.df00101*zetac;
2106 facrb = vxc.df0101*rhowb[i][0]+vxc.df1001*rhowa[i][0]+
2107 vxc.df0002*zetab+vxc.df0011*zetaa+vxc.df00011*zetac;
2108 facra = facra*znva;
2109 facrb = facrb*znvb;
2110 facg = vxc.df10001*rhowa[i][0]+vxc.df01001*rhowb[i][0]+
2111 vxc.df00101*zetaa+vxc.df00011*zetab+vxc.df00002*zetac;
2112 vt3a[i][1] = grid->g.rad.a[i][0]*facra+
2113 grid->g.rad.b[i][0]*facg+
2114 znva*vxc.df0010*rhowa[i][1]-
2115 znva*znva*zetaa*vxc.df0010*grid->g.rad.a[i][0]+
2116 rhowb[i][1]*vxc.df00001;
2117 vt3b[i][1] = grid->g.rad.b[i][0]*facrb+
2118 grid->g.rad.a[i][0]*facg+
2119 znvb*vxc.df0001*rhowb[i][1]-
2120 znvb*znvb*zetab*vxc.df0001*grid->g.rad.b[i][0]+
2121 rhowa[i][1]*vxc.df00001;
2122 vt3a[i][2] = grid->g.rad.a[i][1]*facra+
2123 grid->g.rad.b[i][1]*facg+
2124 znva*vxc.df0010*rhowa[i][2]-
2125 znva*znva*zetaa*vxc.df0010*grid->g.rad.a[i][1]+
2126 rhowb[i][2]*vxc.df00001;
2127 vt3b[i][2] = grid->g.rad.b[i][1]*facrb+
2128 grid->g.rad.a[i][1]*facg+
2129 znvb*vxc.df0001*rhowb[i][2]-
2130 znvb*znvb*zetab*vxc.df0001*grid->g.rad.b[i][1]+
2131 rhowa[i][2]*vxc.df00001;
2132 vt3a[i][3] = grid->g.rad.a[i][2]*facra+
2133 grid->g.rad.b[i][2]*facg+
2134 znva*vxc.df0010*rhowa[i][3]-
2135 znva*znva*zetaa*vxc.df0010*grid->g.rad.a[i][2]+
2136 rhowb[i][3]*vxc.df00001;
2137 vt3b[i][3] = grid->g.rad.b[i][2]*facrb+
2138 grid->g.rad.a[i][2]*facg+
2139 znvb*vxc.df0001*rhowb[i][3]-
2140 znvb*znvb*zetab*vxc.df0001*grid->g.rad.b[i][2]+
2141 rhowa[i][3]*vxc.df00001;
2142 }
2143
2144
2145 for(isym=0; isym<grid->nsym; isym++) {
2146 integer (*RESTRICT iblocks)[2] = BASBLOCK(grid,isym);
2147 integer ibl_cnt = grid->bas_bl_cnt[isym];
2148
2149 for(ibl=0; ibl<ibl_cnt; ibl++) {
2150 for(i=iblocks[ibl][0]-1; i<iblocks[ibl][1]; i++) {
2151 real *RESTRICT g0i = &aos[i*bllen];
2152 real *RESTRICT gxi = &aox[i*bllen];
2153 real *RESTRICT gyi = &aoy[i*bllen];
2154 real *RESTRICT gzi = &aoz[i*bllen];
2155 integer ioff = i*inforb_.nbast + ivec*inforb_.n2basx;
2156 integer jsym = inforb_.muld2h[data->ksymop-1][isym]-1;
2157 integer (*RESTRICT jblocks)[2] = BASBLOCK(grid,jsym);
2158 integer jbl_cnt = grid->bas_bl_cnt[jsym];
2159 for(k=blstart; k<blend; k++) {
2160 tmpa[k] = vt3a[k][0]*g0i[k]+
2161 vt3a[k][1]*gxi[k]+
2162 vt3a[k][2]*gyi[k]+
2163 vt3a[k][3]*gzi[k];
2164 tmpb[k] = vt3b[k][0]*g0i[k]+
2165 vt3b[k][1]*gxi[k]+
2166 vt3b[k][2]*gyi[k]+
2167 vt3b[k][3]*gzi[k];
2168 }
2169 for(jbl=0; jbl<jbl_cnt; jbl++) {
2170 integer jtop = jblocks[jbl][1];
2171 for(j=jblocks[jbl][0]-1; j<jtop; j++) {
2172 real *RESTRICT g0j = &aos[j*bllen];
2173 real sa = 0;
2174 real sb = 0;
2175 for(k=blstart; k<blend; k++) {
2176 sa += g0j[k]*tmpa[k];
2177 sb += g0j[k]*tmpb[k];
2178 }
2179 exc_a[j+ioff] += sa;
2180 exc_b[j+ioff] += sb;
2181 }
2182 }
2183 }
2184 }
2185 }
2186 }
2187 }
2188
2189
2190 void
FSYM2(dft_lin_respab_b)2191 FSYM2(dft_lin_respab_b)(integer *nosim, real* fmatc, real* fmato, real *cmo,
2192 real *zymat, integer *trplet, integer *ksymop, real* work,
2193 integer* lwork, integer* iprint)
2194 {
2195 static const integer MAX_VEC = 5;
2196 static const real DP5R = 0.5;
2197 static const real MDP5R = -0.5;
2198
2199 struct tms starttm, endtm; clock_t utm;
2200 real electrons = 0;
2201 LinResp_ab_BlData lr_data;
2202 real *runit;
2203 integer max_vecs;
2204 integer i, j, jvec, ivec;
2205 real averag;
2206 real * fmata, * fmatb;
2207
2208 /* parallel code insert */
2209 #if defined(VAR_MPI)
2210 dft_wake_slaves((DFTPropEvalMaster)FSYM2(dft_lin_respab_b));
2211 dft_lin_resp_sync_slaves(cmo,nosim,&zymat,trplet,ksymop,
2212 &work, *lwork);
2213 #endif
2214 /* start timer and allocate memory */
2215 times(&starttm);
2216 max_vecs = *nosim>MAX_VEC ? MAX_VEC : *nosim;
2217 lr_data.dmata = dal_malloc(2*inforb_.n2basx*sizeof(real));
2218 lr_data.dmatb = lr_data.dmata+inforb_.n2basx;
2219 lr_data.res_a = dal_malloc(inforb_.n2basx*sizeof(real)*max_vecs);
2220 lr_data.res_b = dal_malloc(inforb_.n2basx*sizeof(real)*max_vecs);
2221 lr_data.kappa_a = dal_malloc(inforb_.n2basx*sizeof(real)*max_vecs);
2222 lr_data.kappa_b = dal_malloc(inforb_.n2basx*sizeof(real)*max_vecs);
2223 lr_data.tmpa = dal_malloc(inforb_.nbast*DFT_BLLEN*sizeof(real));
2224 lr_data.tmpb = dal_malloc(inforb_.nbast*DFT_BLLEN*sizeof(real));
2225 if (selected_func->is_gga()) {
2226 lr_data.rhowa = dal_malloc(DFT_BLLEN*4*sizeof(real));
2227 lr_data.rhowb = dal_malloc(DFT_BLLEN*4*sizeof(real));
2228 lr_data.vta = dal_malloc(4*DFT_BLLEN*sizeof(real));
2229 lr_data.vtb = dal_malloc(4*DFT_BLLEN*sizeof(real));
2230 } else {
2231 lr_data.rhowa = dal_malloc(DFT_BLLEN*sizeof(real));
2232 lr_data.rhowb = dal_malloc(DFT_BLLEN*sizeof(real));
2233 lr_data.vta = dal_malloc(DFT_BLLEN*sizeof(real));
2234 lr_data.vtb = dal_malloc(DFT_BLLEN*sizeof(real));
2235 }
2236 lr_data.dtgaoa = dal_malloc(inforb_.nbast*sizeof(real));
2237 lr_data.dtgaob = dal_malloc(inforb_.nbast*sizeof(real));
2238 lr_data.trplet= *trplet;
2239 lr_data.ksymop= *ksymop;
2240 /* get density in AO basis*/
2241 FSYM2(dft_get_ao_dens_matab)(cmo,lr_data.dmatb,lr_data.dmata,work,lwork);
2242 FSYM(daxpy)(&inforb_.n2basx,&DP5R,lr_data.dmatb,&ONEI,lr_data.dmata,&ONEI);
2243 FSYM(dscal)(&inforb_.n2basx,&DP5R,lr_data.dmatb,&ONEI);
2244 /* get active density in MO basis */
2245 runit=dal_malloc(inforb_.n2ashx*sizeof(real));
2246 FSYM(dunit)(runit,&inforb_.nasht);
2247 /* MO alpha, beta Vxc contributions */
2248 integer sz1 = max_vecs*inforb_.n2orbx;
2249 integer sz1_int= sz1;
2250 fmata=dal_malloc(sz1_int*sizeof(real));
2251 fmatb=dal_malloc(sz1_int*sizeof(real));
2252 for(ivec=0; ivec<*nosim; ivec+=max_vecs) {
2253 integer sz;
2254 lr_data.vecs_in_batch = ivec + max_vecs > *nosim ? *nosim - ivec : max_vecs;
2255 sz = lr_data.vecs_in_batch * inforb_.n2basx;
2256 FSYM(dzero)(lr_data.kappa_a, &sz);
2257 FSYM(dzero)(lr_data.kappa_b, &sz);
2258 for(jvec=0; jvec<lr_data.vecs_in_batch; jvec++)
2259 FSYM(deq27)(cmo,zymat+(ivec+jvec)*inforb_.n2orbx,runit,
2260 lr_data.kappa_b+jvec*inforb_.n2basx,
2261 lr_data.kappa_a+jvec*inforb_.n2basx,
2262 work,lwork);
2263 dscal_(&sz,&DP5R,lr_data.kappa_b,&ONEI);
2264 FSYM(daxpy)(&sz,&ONER,lr_data.kappa_b,&ONEI,lr_data.kappa_a,&ONEI);
2265 FSYM(dzero)(lr_data.res_a, &sz);
2266 FSYM(dzero)(lr_data.res_b, &sz);
2267 electrons = dft_integrate_ao_bl(2, lr_data.dmata, work, lwork, iprint, 0,
2268 (DftBlockCallback)(selected_func->is_gga() ?
2269 lin_resp_cb_b_gga_ab : lin_resp_cb_b_lda_ab),
2270 &lr_data);
2271 #ifdef VAR_MPI
2272 dft_lin_respab_b_collect(lr_data.vecs_in_batch, lr_data.res_a, lr_data.res_b, work, *lwork);
2273 if(fmatc == NULL) continue;
2274 #endif
2275 FSYM(dzero)(fmata, &sz1);
2276 FSYM(dzero)(fmatb, &sz1);
2277 for(jvec=0; jvec<lr_data.vecs_in_batch; jvec++){
2278 for(i=0; i<inforb_.nbast; i++) {
2279 integer ioff = i*inforb_.nbast + jvec*inforb_.n2basx;
2280 for(j=0; j<i; j++) {
2281 integer joff = j*inforb_.nbast + jvec*inforb_.n2basx;
2282 averag = 0.5*(lr_data.res_a[i+joff] +
2283 lr_data.res_a[j+ioff]);
2284 lr_data.res_a[i+joff] = lr_data.res_a[j+ioff] = averag;
2285 averag = 0.5*(lr_data.res_b[i+joff] +
2286 lr_data.res_b[j+ioff]);
2287 lr_data.res_b[i+joff] = lr_data.res_b[j+ioff] = averag;
2288 }
2289 }
2290 /* transform to MO Fock matrix contribution */
2291 FSYM(lrao2mo)(cmo, &lr_data.ksymop, lr_data.res_a+jvec*inforb_.n2basx,
2292 fmata+jvec*inforb_.n2orbx, work, lwork);
2293 FSYM(lrao2mo)(cmo, &lr_data.ksymop, lr_data.res_b+jvec*inforb_.n2basx,
2294 fmatb+jvec*inforb_.n2orbx, work, lwork);
2295 }
2296 integer sz2 = inforb_.n2orbx*lr_data.vecs_in_batch;
2297 FSYM(daxpy)(&sz2,&ONER,fmata,&ONEI,fmatc+ivec*inforb_.n2orbx,&ONEI);
2298 if (inforb_.nasht > 0) {
2299 if (*trplet) {
2300 FSYM(daxpy)(&sz2,&MDP5R,fmatb,&ONEI,fmato+ivec*inforb_.n2orbx,&ONEI);
2301 } else {
2302 FSYM(daxpy)(&sz2,&DP5R,fmatb,&ONEI,fmato+ivec*inforb_.n2orbx,&ONEI);
2303 }
2304 FSYM(daxpy)(&sz2,&MDP5R,fmata,&ONEI,fmato+ivec*inforb_.n2orbx,&ONEI);
2305 }
2306 }
2307 free(runit);
2308 free(fmata);
2309 free(fmatb);
2310 free(lr_data.dmata);
2311 free(lr_data.res_a);
2312 free(lr_data.res_b);
2313 free(lr_data.kappa_a);
2314 free(lr_data.kappa_b);
2315 free(lr_data.rhowa);
2316 free(lr_data.rhowb);
2317 free(lr_data.vta);
2318 free(lr_data.vtb);
2319 free(lr_data.tmpa);
2320 free(lr_data.tmpb);
2321 free(lr_data.dtgaoa);
2322 free(lr_data.dtgaob);
2323
2324 if (*iprint>0) {
2325 times(&endtm);
2326 utm = endtm.tms_utime-starttm.tms_utime;
2327 fort_print(" Electrons: %f(%9.3g): LR-AB-DFT*%d evaluation time: %9.1f s",
2328 electrons, (double)(electrons-(integer)(electrons+0.5)), *nosim,
2329 utm/(double)sysconf(_SC_CLK_TCK));
2330 }
2331 }
2332
2333