1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /** @file lin_trans.cc
31
32 @brief Blocked DFT Linear Response contribution evaluator.
33
34 @author: Pawel Salek <em>responsible</em>
35 */
36
37 /* -*-mode:c; c-basic-offset:4; -*- */
38
39 #include "config.h"
40
41 #include <stdio.h>
42 #include <cmath>
43 #include <string.h>
44 #include <time.h>
45 #include <sys/times.h>
46 #include <unistd.h>
47
48 #include <pthread.h>
49 static pthread_mutex_t dft_prop_mutex = PTHREAD_MUTEX_INITIALIZER;
50
51 #include "aos.h"
52 #include "dft_common.h"
53 #include "functionals.h"
54 #include "integrator.h"
55 #include "output.h"
56 #include "grid_matrix.h"
57 #include "rho-mat.h"
58 #include "utilities.h"
59
60 /* restrict hints should not be necessary... */
61 #if !defined(restrict)
62 #define restrict
63 #endif
64
65 inline int
min(int a,int b)66 min(int a, int b) {
67 return a<b ? a : b;
68 }
69
70 typedef struct {
71 const real *kappa;
72 real *res;
73 real* vt; /* dimensioned [bllen] for LDA, [bllen][4] for GGA */
74 int trplet, nbast, vecs_in_batch;
75 } LinRespBlData;
76
77 static void
lin_resp_cb_b_lda(DftIntegratorBl * grid,real * restrict tmp,int bllen,int blstart,int blend,LinRespBlData * data)78 lin_resp_cb_b_lda(DftIntegratorBl* grid, real * restrict tmp,
79 int bllen, int blstart, int blend,
80 LinRespBlData* data)
81 {
82 const real * restrict aos = grid->atv;
83 real * restrict excmat = data->res;
84 real * restrict vt = data->vt; /* [bllen][4] */
85 int ibl, i, jbl, j, k, isym, ivec;
86 int nbast = data->nbast;
87 int n2basx = nbast*nbast;
88 FunDensProp dp = { 0 };
89
90 for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
91 /* compute vector of transformed densities vt */
92 getexp_blocked_lda(nbast, data->kappa + ivec*n2basx, grid->atv,
93 grid->bas_bl_cnt,
94 grid->basblocks, grid->shl_bl_cnt,
95 tmp, bllen, vt);
96
97 for(i=blstart; i<blend; i++) {
98 SecondDrv vxc;
99 real weight = grid->weight[grid->curr_point+i];
100 dp.rhoa = dp.rhob = 0.5*grid->r.rho[i];
101 dftpot1_(&vxc, &weight, &dp, &data->trplet);
102 vt[i] = vxc.fRR*vt[i]*2;
103 }
104 for(isym=0; isym<grid->nsym; isym++) {
105 const real *vPot = vt;
106 int (*restrict iblocks)[2] = BASBLOCK(grid,isym);
107 int ibl_cnt = grid->bas_bl_cnt[isym];
108
109 for(ibl=0; ibl<ibl_cnt; ibl++)
110 for(i=iblocks[ibl][0]; i<iblocks[ibl][1]; i++) {
111 int ioff = i*bllen;
112 for(k=blstart; k<blend; k++)
113 tmp[k+ioff] = aos[k+ioff]*vPot[k];
114 }
115
116 for(ibl=0; ibl<ibl_cnt; ibl++) {
117 for(i=iblocks[ibl][0]; i<iblocks[ibl][1]; i++) {
118 int ioff = i*nbast + ivec*n2basx;
119 int jsym = 0; /* inforb_.muld2h[data->ksymop-1][isym]-1; */
120 int (*restrict jblocks)[2] = BASBLOCK(grid,jsym);
121 int jbl_cnt = grid->bas_bl_cnt[jsym];
122 real *restrict tmpi = &tmp[i*bllen];
123 if (isym<jsym) continue;
124 for(jbl=0; jbl<jbl_cnt; jbl++) {
125 int jtop = min(jblocks[jbl][1],i);
126 for(j=jblocks[jbl][0]; j<jtop; j++) {
127 ergo_real s = 0;
128 for(k=blstart; k<blend; k++)
129 s += aos[k+j*bllen]*tmpi[k];
130 excmat[j+ioff] += s;
131 }
132 }
133 for(k=blstart; k<blend; k++)
134 excmat[i+ioff] += aos[k+i*bllen]*tmpi[k]*0.5;
135 }
136 }
137 }
138 }
139 }
140
141 static void
lin_resp_cb_b_gga(DftIntegratorBl * grid,real * restrict tmp,int bllen,int blstart,int blend,LinRespBlData * data)142 lin_resp_cb_b_gga(DftIntegratorBl* grid, real * restrict tmp,
143 int bllen, int blstart, int blend,
144 LinRespBlData* data)
145 {
146 int ibl, i, jbl, j, k, isym, ivec;
147 int nbast = data->nbast;
148 int n2basx = nbast*nbast;
149 real (* restrict vt3)[4] = (real(*)[4])data->vt; /* [bllen][4] */
150 real * restrict aos = grid->atv;
151 real * restrict aox = grid->atv+bllen*nbast;
152 real * restrict aoy = grid->atv+bllen*nbast*2;
153 real * restrict aoz = grid->atv+bllen*nbast*3;
154 real * restrict excmat = data->res;
155 FunDensProp dp = { 0 };
156
157 for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
158 /* compute vector of transformed densities and dens. gradients vt3 */
159 getexp_blocked_gga(nbast, data->kappa + ivec*n2basx, grid->atv,
160 grid->bas_bl_cnt,
161 grid->basblocks, grid->shl_bl_cnt,
162 tmp, bllen, vt3);
163 for(i=blstart; i<blend; i++) {
164 SecondDrv vxc;
165 real facr, facg;
166 real weight = grid->weight[grid->curr_point+i];
167 real ngrad = template_blas_sqrt(grid->g.grad[i][0]*grid->g.grad[i][0]+
168 grid->g.grad[i][1]*grid->g.grad[i][1]+
169 grid->g.grad[i][2]*grid->g.grad[i][2]);
170 real brg, brz, b0 = vt3[i][0];
171 if(ngrad<1e-15|| grid->r.rho[i]<1e-15) {
172 vt3[i][0] = vt3[i][1] = vt3[i][2] = vt3[i][3] = 0;
173 continue;
174 }
175 brg = (vt3[i][1]*grid->g.grad[i][0] +
176 vt3[i][2]*grid->g.grad[i][1] +
177 vt3[i][3]*grid->g.grad[i][2]);
178 brz = brg/ngrad;
179 dp. rhoa = dp. rhob = 0.5*grid->r.rho[i];
180 dp.grada = dp.gradb = 0.5*ngrad;
181 dp.gradab = dp.grada*dp.gradb;
182 dftpot1_(&vxc, &weight, &dp, &data->trplet);
183 facr = vxc.fRZ*b0 + (vxc.fZZ-vxc.fZ/ngrad)*brz + vxc.fZG*brg;
184 facr = facr/ngrad + (vxc.fRG*b0+vxc.fZG*brz +vxc.fGG*brg);
185 facg = vxc.fZ/ngrad + vxc.fG;
186 vt3[i][0] = vxc.fRR*b0 + vxc.fRZ*brz+ vxc.fRG*brg;
187 vt3[i][1] = (grid->g.grad[i][0]*facr + facg*vt3[i][1])*2;
188 vt3[i][2] = (grid->g.grad[i][1]*facr + facg*vt3[i][2])*2;
189 vt3[i][3] = (grid->g.grad[i][2]*facr + facg*vt3[i][3])*2;
190 }
191
192 for(isym=0; isym<grid->nsym; isym++) {
193 int (*restrict iblocks)[2] = BASBLOCK(grid,isym);
194 int ibl_cnt = grid->bas_bl_cnt[isym];
195
196 for(ibl=0; ibl<ibl_cnt; ibl++) {
197 for(i=iblocks[ibl][0]; i<iblocks[ibl][1]; i++) {
198 real *restrict g0i = &aos[i*bllen];
199 real *restrict gxi = &aox[i*bllen];
200 real *restrict gyi = &aoy[i*bllen];
201 real *restrict gzi = &aoz[i*bllen];
202 int ioff = i*nbast + ivec*n2basx;
203 int jsym = 0; /* inforb_.muld2h[data->ksymop-1][isym]-1; */
204 int (*restrict jblocks)[2] = BASBLOCK(grid,jsym);
205 int jbl_cnt = grid->bas_bl_cnt[jsym];
206 for(k=blstart; k<blend; k++)
207 tmp[k] = (vt3[k][0]*g0i[k] +
208 vt3[k][1]*gxi[k] +
209 vt3[k][2]*gyi[k] +
210 vt3[k][3]*gzi[k]);
211 for(jbl=0; jbl<jbl_cnt; jbl++) {
212 int jtop = jblocks[jbl][1];
213 for(j=jblocks[jbl][0]; j<jtop; j++) {
214 real *restrict g0j = &aos[j*bllen];
215 real s = 0;
216 for(k=blstart; k<blend; k++)
217 s += g0j[k]*tmp[k];
218 excmat[j+ioff] += s;
219 }
220 }
221 }
222 }
223 }
224 }
225 }
226
227 #if 0
228 static void
229 printmat(int n, const ergo_real *m, const char *name)
230 {
231 int i, j;
232 printf("Printing matrix %s\n", name);
233 for(i=0; i<n; i++) {
234 for(j=0; j<n; j++)
235 printf("%10.5f", m[i + j*n]);
236 puts("");
237 }
238 }
239 #endif
240
241 /** dft_lin_respao performs the transformation of given transition
242 density @param vec and the result is stored in @param trans_vec -
243 both of which are square matrix A ground state density @param dens
244 is required.
245 @param bis is the basis set description structure.
246 @param mol contains the molecule data (is this strictly needed?)
247 @param gss a structure describing the grid settings.
248 @param nThreads tells how many threads execute this section
249 (needed for grid).
250 */
251 EXTERN_C real
dft_lin_respao(const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,const real * dens,const real * vec,real * trans_vec,int nThreads)252 dft_lin_respao(const BasisInfoStruct& bis, const Molecule& mol,
253 const Dft::GridParams& gss,
254 const real *dens, const real *vec, real* trans_vec,
255 int nThreads)
256 {
257 real electrons = 0;
258 LinRespBlData lr_data; /* linear response data */
259 int i, j, jvec;
260 int nbast = bis.noOfBasisFuncs;
261 int n2basx = nbast*nbast;
262 Dft::FullMatrix density(dens, nbast);
263 const Dft::FullMatrix *densPtr = &density;
264 Util::TimeMeter tm;
265
266 lr_data.vt = dal_new(DFT_MAX_BLLEN*4, real);
267 lr_data.kappa = vec;
268 lr_data.res = dal_new(nbast*nbast, real);
269 lr_data.trplet= 0; /* FIXME: should be 1 when finding excitations */
270 lr_data.vecs_in_batch = 1; /*nosim; */
271 lr_data.nbast = nbast;
272 memset(lr_data.res, 0, nbast*nbast*sizeof(real));
273 electrons = Dft::integrate(1, &densPtr, bis, mol, gss, nThreads,
274 (DftBlockCallback)
275 (selected_func->is_gga() ?
276 lin_resp_cb_b_gga : lin_resp_cb_b_lda),
277 &lr_data);
278 pthread_mutex_lock(&dft_prop_mutex);
279 for(jvec=0; jvec<lr_data.vecs_in_batch; jvec++){
280 for(i=0; i<nbast; i++) {
281 int ioff = i*nbast + jvec*n2basx;
282 for(j=0; j<i; j++) {
283 int joff = j*nbast + jvec*n2basx;
284 real averag = 0.5*(lr_data.res[i+joff] + lr_data.res[j+ioff]);
285 trans_vec[i+joff] += averag;
286 trans_vec[j+ioff] += averag;
287 }
288 trans_vec[i+ioff] += lr_data.res[i+ioff];
289 }
290 }
291 pthread_mutex_unlock(&dft_prop_mutex);
292 free(lr_data.res);
293 free(lr_data.vt);
294
295 if(nThreads<=1) {
296 int nElectrons = mol.getNumberOfElectrons();
297 do_output(LOG_CAT_INFO, LOG_AREA_LR,
298 "LR-DFT*%d finished. Electrons: %f(%9.3g) (serial)",
299 lr_data.vecs_in_batch, (double)electrons,
300 (double)((electrons-nElectrons)/nElectrons));
301 tm.print(LOG_AREA_DFT, __func__);
302 }
303 return electrons;
304 }
305
306 struct LinData {
307 const BasisInfoStruct *bis;
308 const Molecule *mol;
309 const Dft::GridParams *gss;
310 const real *density;
311 const real *inputVec;
312 real *transformedVec;
313 real electrons;
314 int nThreads;
315 };
316
317 static void*
dft_lin_resp_worker(void * data)318 dft_lin_resp_worker(void *data)
319 {
320 static const int LINRESP_ERROR = 1;
321 LinData *ld = (LinData*)data;
322 try {
323 ld->electrons =
324 dft_lin_respao(*ld->bis, *ld->mol, *ld->gss,
325 ld->density, ld->inputVec, ld->transformedVec,
326 ld->nThreads);
327 } catch(const char *s) {
328 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
329 "dft_lin_resp_worker thread caught an exception '%s'", s);
330 return (void*)&LINRESP_ERROR;
331 } catch(const std::bad_alloc & e) {
332 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
333 "dft_lin_resp_worker thread caught an exception '%s'", e.what());
334 return (void*)&LINRESP_ERROR;
335 } catch(...) {
336 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
337 "dft_lin_resp_worker thread caught unexpected exception.");
338 return (void*)&LINRESP_ERROR;
339 }
340 return NULL;
341 }
342
343 EXTERN_C real
dft_lin_resp_mt(const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,const real * dens,const real * vec,real * trans_vec)344 dft_lin_resp_mt(const BasisInfoStruct& bis, const Molecule& mol,
345 const Dft::GridParams& gss,
346 const real *dens, const real *vec, real* trans_vec)
347 {
348 int i, threads;
349 real electrons = 0;
350
351 Util::TimeMeter tm;
352
353 threads = dft_get_num_threads();
354 std::vector<LinData> data(threads);
355 std::vector<pthread_t> pids(threads);
356
357 for(i=0; i<threads; i++) {
358 data[i].bis = &bis;
359 data[i].mol = &mol;
360 data[i].gss = &gss;
361 data[i].density = dens;
362 data[i].inputVec = vec;
363 data[i].transformedVec = trans_vec;
364 data[i].nThreads = threads;
365 if (pthread_create(&pids[i], NULL, dft_lin_resp_worker, &data[i])) {
366 do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
367 "Creation of thread # %d failed\n", i);
368 if (i==0)
369 throw "No worker threads could be started";
370 else
371 break;
372 }
373 }
374 while (--i >= 0) {
375 pthread_join(pids[i], NULL);
376 electrons += data[i].electrons;
377 }
378 if(threads>1) {
379 int nElectrons = mol.getNumberOfElectrons();
380 do_output(LOG_CAT_INFO, LOG_AREA_LR,
381 "LR-DFT*%d finished. Electrons: %f(%9.3g) (mt)",
382 1, (double)electrons,
383 (double)((electrons-nElectrons)/nElectrons));
384 tm.print(LOG_AREA_DFT, __func__);
385 }
386 return electrons;
387 }
388