1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30 /** @file lin_trans.cc
31 
32     @brief Blocked DFT Linear Response contribution evaluator.
33 
34     @author: Pawel Salek <em>responsible</em>
35 */
36 
37 /* -*-mode:c; c-basic-offset:4; -*- */
38 
39 #include "config.h"
40 
41 #include <stdio.h>
42 #include <cmath>
43 #include <string.h>
44 #include <time.h>
45 #include <sys/times.h>
46 #include <unistd.h>
47 
48 #include <pthread.h>
49 static pthread_mutex_t dft_prop_mutex = PTHREAD_MUTEX_INITIALIZER;
50 
51 #include "aos.h"
52 #include "dft_common.h"
53 #include "functionals.h"
54 #include "integrator.h"
55 #include "output.h"
56 #include "grid_matrix.h"
57 #include "rho-mat.h"
58 #include "utilities.h"
59 
60 /* restrict hints should not be necessary... */
61 #if !defined(restrict)
62 #define restrict
63 #endif
64 
65 inline int
min(int a,int b)66 min(int a, int b) {
67   return a<b ? a : b;
68 }
69 
70 typedef struct {
71   const real *kappa;
72   real *res;
73   real* vt; /* dimensioned [bllen] for LDA, [bllen][4] for GGA */
74   int   trplet, nbast, vecs_in_batch;
75 } LinRespBlData;
76 
77 static void
lin_resp_cb_b_lda(DftIntegratorBl * grid,real * restrict tmp,int bllen,int blstart,int blend,LinRespBlData * data)78 lin_resp_cb_b_lda(DftIntegratorBl* grid, real * restrict tmp,
79                   int bllen, int blstart, int blend,
80                   LinRespBlData* data)
81 {
82   const real * restrict aos = grid->atv;
83   real * restrict excmat = data->res;
84   real * restrict vt = data->vt; /* [bllen][4] */
85   int ibl, i, jbl, j, k, isym, ivec;
86   int nbast = data->nbast;
87   int n2basx = nbast*nbast;
88   FunDensProp dp = { 0 };
89 
90   for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
91     /* compute vector of transformed densities vt */
92     getexp_blocked_lda(nbast, data->kappa + ivec*n2basx, grid->atv,
93                        grid->bas_bl_cnt,
94                        grid->basblocks, grid->shl_bl_cnt,
95                        tmp, bllen, vt);
96 
97     for(i=blstart; i<blend; i++) {
98       SecondDrv vxc;
99       real weight = grid->weight[grid->curr_point+i];
100       dp.rhoa = dp.rhob = 0.5*grid->r.rho[i];
101       dftpot1_(&vxc, &weight, &dp, &data->trplet);
102       vt[i] = vxc.fRR*vt[i]*2;
103     }
104     for(isym=0; isym<grid->nsym; isym++) {
105       const real *vPot = vt;
106       int (*restrict iblocks)[2] = BASBLOCK(grid,isym);
107       int ibl_cnt = grid->bas_bl_cnt[isym];
108 
109       for(ibl=0; ibl<ibl_cnt; ibl++)
110         for(i=iblocks[ibl][0]; i<iblocks[ibl][1]; i++) {
111           int ioff = i*bllen;
112           for(k=blstart; k<blend; k++)
113             tmp[k+ioff] = aos[k+ioff]*vPot[k];
114         }
115 
116       for(ibl=0; ibl<ibl_cnt; ibl++) {
117         for(i=iblocks[ibl][0]; i<iblocks[ibl][1]; i++) {
118           int ioff = i*nbast + ivec*n2basx;
119           int jsym = 0; /* inforb_.muld2h[data->ksymop-1][isym]-1; */
120           int (*restrict jblocks)[2] = BASBLOCK(grid,jsym);
121           int jbl_cnt = grid->bas_bl_cnt[jsym];
122           real *restrict tmpi = &tmp[i*bllen];
123           if (isym<jsym) continue;
124           for(jbl=0; jbl<jbl_cnt; jbl++) {
125             int jtop = min(jblocks[jbl][1],i);
126             for(j=jblocks[jbl][0]; j<jtop; j++) {
127               ergo_real s = 0;
128               for(k=blstart; k<blend; k++)
129                 s += aos[k+j*bllen]*tmpi[k];
130               excmat[j+ioff] += s;
131             }
132           }
133           for(k=blstart; k<blend; k++)
134             excmat[i+ioff] += aos[k+i*bllen]*tmpi[k]*0.5;
135         }
136       }
137     }
138   }
139 }
140 
141 static void
lin_resp_cb_b_gga(DftIntegratorBl * grid,real * restrict tmp,int bllen,int blstart,int blend,LinRespBlData * data)142 lin_resp_cb_b_gga(DftIntegratorBl* grid, real * restrict tmp,
143                   int bllen, int blstart, int blend,
144                   LinRespBlData* data)
145 {
146   int ibl, i, jbl, j, k, isym, ivec;
147   int nbast = data->nbast;
148   int n2basx = nbast*nbast;
149   real (* restrict vt3)[4] = (real(*)[4])data->vt; /* [bllen][4] */
150   real * restrict aos = grid->atv;
151   real * restrict aox = grid->atv+bllen*nbast;
152   real * restrict aoy = grid->atv+bllen*nbast*2;
153   real * restrict aoz = grid->atv+bllen*nbast*3;
154   real * restrict excmat = data->res;
155   FunDensProp dp = { 0 };
156 
157   for(ivec=0; ivec<data->vecs_in_batch; ivec++) {
158     /* compute vector of transformed densities and dens. gradients vt3 */
159     getexp_blocked_gga(nbast, data->kappa + ivec*n2basx, grid->atv,
160                        grid->bas_bl_cnt,
161                        grid->basblocks, grid->shl_bl_cnt,
162                        tmp, bllen, vt3);
163     for(i=blstart; i<blend; i++) {
164       SecondDrv vxc;
165       real facr, facg;
166       real weight = grid->weight[grid->curr_point+i];
167       real ngrad  = template_blas_sqrt(grid->g.grad[i][0]*grid->g.grad[i][0]+
168 				       grid->g.grad[i][1]*grid->g.grad[i][1]+
169 				       grid->g.grad[i][2]*grid->g.grad[i][2]);
170       real brg, brz, b0 = vt3[i][0];
171       if(ngrad<1e-15|| grid->r.rho[i]<1e-15) {
172         vt3[i][0] = vt3[i][1] = vt3[i][2] = vt3[i][3] = 0;
173         continue;
174       }
175       brg = (vt3[i][1]*grid->g.grad[i][0] +
176              vt3[i][2]*grid->g.grad[i][1] +
177              vt3[i][3]*grid->g.grad[i][2]);
178       brz = brg/ngrad;
179       dp. rhoa = dp. rhob = 0.5*grid->r.rho[i];
180       dp.grada = dp.gradb = 0.5*ngrad;
181       dp.gradab = dp.grada*dp.gradb;
182       dftpot1_(&vxc, &weight, &dp, &data->trplet);
183       facr = vxc.fRZ*b0 + (vxc.fZZ-vxc.fZ/ngrad)*brz + vxc.fZG*brg;
184       facr = facr/ngrad + (vxc.fRG*b0+vxc.fZG*brz +vxc.fGG*brg);
185       facg = vxc.fZ/ngrad + vxc.fG;
186       vt3[i][0] = vxc.fRR*b0 + vxc.fRZ*brz+ vxc.fRG*brg;
187       vt3[i][1] = (grid->g.grad[i][0]*facr + facg*vt3[i][1])*2;
188       vt3[i][2] = (grid->g.grad[i][1]*facr + facg*vt3[i][2])*2;
189       vt3[i][3] = (grid->g.grad[i][2]*facr + facg*vt3[i][3])*2;
190     }
191 
192     for(isym=0; isym<grid->nsym; isym++) {
193       int (*restrict iblocks)[2] = BASBLOCK(grid,isym);
194       int ibl_cnt = grid->bas_bl_cnt[isym];
195 
196       for(ibl=0; ibl<ibl_cnt; ibl++) {
197         for(i=iblocks[ibl][0]; i<iblocks[ibl][1]; i++) {
198           real *restrict g0i = &aos[i*bllen];
199           real *restrict gxi = &aox[i*bllen];
200           real *restrict gyi = &aoy[i*bllen];
201           real *restrict gzi = &aoz[i*bllen];
202           int ioff = i*nbast + ivec*n2basx;
203           int jsym = 0; /* inforb_.muld2h[data->ksymop-1][isym]-1; */
204           int (*restrict jblocks)[2] = BASBLOCK(grid,jsym);
205           int jbl_cnt = grid->bas_bl_cnt[jsym];
206           for(k=blstart; k<blend; k++)
207             tmp[k] = (vt3[k][0]*g0i[k] +
208                       vt3[k][1]*gxi[k] +
209                       vt3[k][2]*gyi[k] +
210                       vt3[k][3]*gzi[k]);
211           for(jbl=0; jbl<jbl_cnt; jbl++) {
212             int jtop = jblocks[jbl][1];
213             for(j=jblocks[jbl][0]; j<jtop; j++) {
214               real *restrict g0j = &aos[j*bllen];
215               real s = 0;
216               for(k=blstart; k<blend; k++)
217                 s += g0j[k]*tmp[k];
218               excmat[j+ioff] += s;
219             }
220           }
221         }
222       }
223     }
224   }
225 }
226 
227 #if 0
228 static void
229 printmat(int n, const ergo_real *m, const char *name)
230 {
231   int i, j;
232   printf("Printing matrix %s\n", name);
233   for(i=0; i<n; i++) {
234     for(j=0; j<n; j++)
235       printf("%10.5f", m[i + j*n]);
236     puts("");
237   }
238 }
239 #endif
240 
241 /** dft_lin_respao performs the transformation of given transition
242     density @param vec and the result is stored in @param trans_vec -
243     both of which are square matrix A ground state density @param dens
244     is required.
245     @param bis is the basis set description structure.
246     @param mol contains the molecule data (is this strictly needed?)
247     @param gss a structure describing the grid settings.
248     @param nThreads tells how many threads execute this section
249     (needed for grid).
250 */
251 EXTERN_C real
dft_lin_respao(const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,const real * dens,const real * vec,real * trans_vec,int nThreads)252 dft_lin_respao(const BasisInfoStruct& bis, const Molecule& mol,
253 	       const Dft::GridParams& gss,
254                const real *dens, const real *vec, real* trans_vec,
255                int nThreads)
256 {
257     real electrons = 0;
258     LinRespBlData lr_data; /* linear response data */
259     int i, j, jvec;
260     int nbast = bis.noOfBasisFuncs;
261     int n2basx = nbast*nbast;
262     Dft::FullMatrix density(dens, nbast);
263     const Dft::FullMatrix *densPtr = &density;
264     Util::TimeMeter tm;
265 
266     lr_data.vt    = dal_new(DFT_MAX_BLLEN*4, real);
267     lr_data.kappa = vec;
268     lr_data.res   = dal_new(nbast*nbast, real);
269     lr_data.trplet= 0;  /* FIXME: should be 1 when finding excitations */
270     lr_data.vecs_in_batch = 1;    /*nosim; */
271     lr_data.nbast = nbast;
272     memset(lr_data.res, 0, nbast*nbast*sizeof(real));
273     electrons = Dft::integrate(1, &densPtr, bis, mol, gss, nThreads,
274                                (DftBlockCallback)
275                                (selected_func->is_gga() ?
276                                 lin_resp_cb_b_gga : lin_resp_cb_b_lda),
277                                &lr_data);
278     pthread_mutex_lock(&dft_prop_mutex);
279     for(jvec=0; jvec<lr_data.vecs_in_batch; jvec++){
280       for(i=0; i<nbast; i++) {
281         int ioff = i*nbast + jvec*n2basx;
282         for(j=0; j<i; j++) {
283           int joff = j*nbast + jvec*n2basx;
284           real averag = 0.5*(lr_data.res[i+joff] + lr_data.res[j+ioff]);
285           trans_vec[i+joff] += averag;
286           trans_vec[j+ioff] += averag;
287         }
288         trans_vec[i+ioff] += lr_data.res[i+ioff];
289       }
290     }
291     pthread_mutex_unlock(&dft_prop_mutex);
292     free(lr_data.res);
293     free(lr_data.vt);
294 
295     if(nThreads<=1) {
296         int nElectrons = mol.getNumberOfElectrons();
297         do_output(LOG_CAT_INFO, LOG_AREA_LR,
298                   "LR-DFT*%d finished. Electrons: %f(%9.3g) (serial)",
299                   lr_data.vecs_in_batch, (double)electrons,
300                   (double)((electrons-nElectrons)/nElectrons));
301 	tm.print(LOG_AREA_DFT, __func__);
302     }
303     return electrons;
304 }
305 
306 struct LinData {
307     const BasisInfoStruct *bis;
308     const Molecule *mol;
309     const Dft::GridParams *gss;
310     const real     *density;
311     const real     *inputVec;
312     real           *transformedVec;
313     real electrons;
314     int nThreads;
315 };
316 
317 static void*
dft_lin_resp_worker(void * data)318 dft_lin_resp_worker(void *data)
319 {
320   static const int LINRESP_ERROR = 1;
321     LinData *ld = (LinData*)data;
322     try {
323       ld->electrons =
324         dft_lin_respao(*ld->bis, *ld->mol, *ld->gss,
325                        ld->density, ld->inputVec, ld->transformedVec,
326                        ld->nThreads);
327     } catch(const char *s) {
328       do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
329 		"dft_lin_resp_worker thread caught an exception '%s'", s);
330       return (void*)&LINRESP_ERROR;
331     } catch(const std::bad_alloc & e) {
332       do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
333 		"dft_lin_resp_worker thread caught an exception '%s'", e.what());
334       return (void*)&LINRESP_ERROR;
335     }  catch(...) {
336       do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
337 		"dft_lin_resp_worker thread caught unexpected exception.");
338       return (void*)&LINRESP_ERROR;
339     }
340     return NULL;
341 }
342 
343 EXTERN_C real
dft_lin_resp_mt(const BasisInfoStruct & bis,const Molecule & mol,const Dft::GridParams & gss,const real * dens,const real * vec,real * trans_vec)344 dft_lin_resp_mt(const BasisInfoStruct& bis, const Molecule& mol,
345 		const Dft::GridParams& gss,
346                 const real *dens, const real *vec, real* trans_vec)
347 {
348     int i, threads;
349     real electrons = 0;
350 
351     Util::TimeMeter tm;
352 
353     threads = dft_get_num_threads();
354     std::vector<LinData> data(threads);
355     std::vector<pthread_t> pids(threads);
356 
357     for(i=0; i<threads; i++) {
358         data[i].bis = &bis;
359         data[i].mol = &mol;
360         data[i].gss = &gss;
361         data[i].density = dens;
362         data[i].inputVec = vec;
363         data[i].transformedVec = trans_vec;
364         data[i].nThreads = threads;
365         if (pthread_create(&pids[i], NULL, dft_lin_resp_worker, &data[i])) {
366 	  do_output(LOG_CAT_ERROR, LOG_AREA_DFT,
367 		    "Creation of thread # %d failed\n", i);
368 	  if (i==0)
369 	    throw "No worker threads could be started";
370 	  else
371 	    break;
372 	}
373     }
374     while (--i >= 0) {
375         pthread_join(pids[i], NULL);
376         electrons += data[i].electrons;
377     }
378     if(threads>1) {
379         int nElectrons = mol.getNumberOfElectrons();
380         do_output(LOG_CAT_INFO, LOG_AREA_LR,
381                   "LR-DFT*%d finished. Electrons: %f(%9.3g) (mt)",
382                   1, (double)electrons,
383                   (double)((electrons-nElectrons)/nElectrons));
384 	tm.print(LOG_AREA_DFT, __func__);
385     }
386     return electrons;
387 }
388