1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30 /** @file rho-mat.cc Functions for density and gradient evaluation.
31     The density can be evaluated at entire batches of grid points.
32 */
33 
34 #ifdef HAVE_CONFIG_H
35 #include "config.h"
36 #endif
37 
38 #include <string.h>
39 #include <stdio.h>
40 
41 #include "realtype.h"
42 typedef ergo_real real;
43 
44 #include "mat_gblas.h"
45 #include "rho-mat.h"
46 
47 #if !defined(restrict)
48 #define restrict
49 #endif
50 
51 /** helper function for zeroing only used blocks of orbitals. Selected
52     values for the first index are looped over, and all allowed values
53     of the second index.
54 
55     @param tmp the matrix[nbast][nvclen]
56 
57     @param nblocks pointer to an integer containing number of nonzero
58     orbital blocks.
59     @param iblocks a set of nblocks integer pairs [a,b) defining the
60     range of first index to be zeroed.
61     @param ldaib not used
62     @param nvclen batch length - and the second dimension of tmp.
63  */
64 static void
zeroorbs(real * tmp,const int * nblocks,const int (* iblocks)[2],int ldaib,int nvclen)65 zeroorbs(real *tmp, const int *nblocks, const int (*iblocks)[2],
66          int ldaib, int nvclen)
67 {
68     /* DIMENSION TMP(NVCLEN,NBAST),NBLOCKS(NSYM),IBLOCKS(2,LDAIB,NSYM) */
69     int ibl, idx, k;
70     for(ibl=0; ibl<nblocks[0]; ibl++)
71         for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
72             real * tmpi = tmp + idx*nvclen;
73             for(k=0; k<nvclen; k++) tmpi[k] = 0.0;
74         }
75 }
76 
77 /** Computes the expectation value <o|dmat|o'> for a symmetric matrix
78   and given set of precomputed orbital values gao. Sparsity of gao as
79   determined with help of nblocks and iblocks is used to reduce the
80   computational effort.
81   @param dmat full square symmetric matrix
82   @param nbast size of dmat
83   @param gao orbital matrix[nbast][nvclen]. Set values are determied
84   by nblocks and iblocks, other values shall not be accessed.
85   @param nblocks number of nonzero row blocks in gao
86   @param iblocks ranges [a,b) of nonzero blocks in gao.
87   @param ldaib not used
88   @param tmp temporary matrix [nbast][nvclen]
89   @param nvclen batch length - number of columns in gao.
90   @param rho the vector[nvclen] where the computed expectation values
91    will be stored.
92 */
93 void
getrho_blocked_lda(int nbast,const real * dmat,const real * restrict gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real * rho)94 getrho_blocked_lda(int nbast, const real * dmat, const real * restrict gao,
95                    const int* nblocks, const int (*iblocks)[2],
96                    int ldaib, real *tmp, int nvclen, real *rho)
97 {
98 /*
99       DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
100       DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
101       DIMENSION TMP(NVCLEN,NBAST)
102 */
103     int ibl, idx, jbl, k;
104 
105     /* dzero(TMP,NVCLEN*NBAST) */
106     zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
107 
108     /* only first symmetry */
109 #if USE_BLAS_IN_XC
110     for(ibl=0; ibl<nblocks[0]; ibl++) {
111       static const ergo_real ONER = 1.0;
112       static const ergo_real HALF = 0.5;
113       int cColumns, sumCols;
114       for(jbl=0; jbl<ibl; jbl++) {
115 	cColumns = iblocks[jbl][1]-iblocks[jbl][0];
116 	sumCols = iblocks[ibl][1]-iblocks[ibl][0];
117 	mat::gemm("N", "N", &nvclen, &cColumns, &sumCols, &ONER,
118 		  gao+iblocks[ibl][0]*nvclen, &nvclen,
119 		  dmat+iblocks[jbl][0]*nbast + iblocks[ibl][0], &nbast, &ONER,
120 		  tmp + iblocks[jbl][0]*nvclen, &nvclen);
121       }
122       cColumns = iblocks[ibl][1]-iblocks[ibl][0];
123       sumCols = iblocks[ibl][1]-iblocks[ibl][0];
124       mat::symm("R", "U", &nvclen, &cColumns, &HALF,
125 		dmat+iblocks[ibl][0]*nbast + iblocks[ibl][0], &nbast,
126 		gao+iblocks[ibl][0]*nvclen, &nvclen, &ONER,
127 		tmp + iblocks[ibl][0]*nvclen, &nvclen);
128     }
129 #else /* USE_BLAS_IN_XC */
130     int jdx;
131     real d;
132     for(ibl=0; ibl<nblocks[0]; ibl++)
133         /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
134         for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
135             real * restrict tmpj;
136             const real * restrict gaoi = gao + idx*nvclen;
137             for(jbl=0; jbl<nblocks[0]; jbl++) {
138                 int jtop = iblocks[jbl][1] > idx ? idx : iblocks[jbl][1];
139                 for(jdx=iblocks[jbl][0]; jdx<jtop; jdx++) {
140                   d = dmat[idx +jdx*nbast];
141                     tmpj = tmp + jdx*nvclen;
142                     for(k=0; k<nvclen; k++)
143                         tmpj[k] += gaoi[k]*d;
144                 }
145             }
146             tmpj = tmp + idx*nvclen;
147             d = dmat[idx +idx*nbast]*0.5;
148             for(k=0; k<nvclen; k++)
149                 tmpj[k] += gaoi[k]*d;
150         }
151 #endif /* USE_BLAS_IN_XC */
152     memset(rho, 0, nvclen*sizeof(real));
153     for(ibl=0; ibl<nblocks[0]; ibl++)
154         for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
155             const real * restrict gaoi = gao + idx*nvclen;
156             const real * restrict tmpi = tmp + idx*nvclen;
157             for(k=0; k<nvclen; k++)
158                 rho[k] += gaoi[k]*tmpi[k]*2.0;
159         }
160 }
161 
162 /** Computes the expectation value <o|dmat|o'> and its derivatives for
163   a symmetric matrix and given set of precomputed orbital values and
164   their cartesian derivatives gao. Sparsity of gao as determined with
165   help of nblocks and iblocks is used to reduce the computational
166   effort.
167 
168   @param dmat full square symmetric matrix
169   @param nbast size of dmat
170 
171   @param gao orbital matrix[4][nbast][nvclen]. First block [0][][]
172   contains orbital values. Subsequent blocks - orbital derivatives wrt
173   x,y, and z coordinates. Set values are determied by nblocks and
174   iblocks, other values shall not be accessed.
175 
176   @param nblocks number of nonzero row blocks in gao
177   @param iblocks ranges [a,b) of nonzero blocks in gao.
178   @param ldaib not used
179   @param tmp temporary matrix [nbast][nvclen]
180   @param nvclen batch length - number of columns in gao.
181   @param rho the vector[nvclen] where the computed expectation values
182   will be stored.
183   @param grad a vector of triples where the computed gradient values
184    will be stored.
185 */
186 void
getrho_blocked_gga(int nbast,const real * dmat,const real * restrict gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real * rho,real (* grad)[3])187 getrho_blocked_gga(int nbast, const real * dmat, const real * restrict gao,
188                    const int* nblocks, const int (*iblocks)[2],
189                    int ldaib, real *tmp, int nvclen,
190                    real *rho, real (*grad)[3])
191 {
192 /*
193       DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
194       DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
195       DIMENSION TMP(NVCLEN,NBAST)
196 */
197     int ibl, idx, jbl, k;
198 
199     /* dzero(TMP,NVCLEN*NBAST) */
200     zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
201     /* only first symmetry */
202 #if USE_BLAS_IN_XC == 1
203     for(ibl=0; ibl<nblocks[0]; ibl++)
204       for(jbl=0; jbl<nblocks[0]; jbl++) {
205 	static const ergo_real ONER = 1.0;
206 	int cColumns = iblocks[jbl][1]-iblocks[jbl][0];
207 	int sumCols = iblocks[ibl][1]-iblocks[ibl][0];
208 	mat::gemm("N", "N", &nvclen, &cColumns, &sumCols, &ONER,
209 		  gao+iblocks[ibl][0]*nvclen, &nvclen,
210 		  dmat+iblocks[jbl][0]*nbast + iblocks[ibl][0],
211 		  &nbast, &ONER,
212 		  tmp + iblocks[jbl][0]*nvclen, &nvclen);
213       }
214 #else /* USE_BLAS_IN_XC */
215     int jdx;
216     real d;
217     for(ibl=0; ibl<nblocks[0]; ibl++)
218         /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
219         for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
220             const real * restrict gaoi = gao + idx*nvclen;
221             for(jbl=0; jbl<nblocks[0]; jbl++) {
222                 for(jdx=iblocks[jbl][0]; jdx<iblocks[jbl][1]; jdx++) {
223                     real * restrict tmpj = tmp + jdx*nvclen;
224                     d = dmat[idx +jdx*nbast];
225                     for(k=0; k<nvclen; k++)
226                         tmpj[k] += gaoi[k]*d;
227                 }
228             }
229         }
230 #endif /* USE_BLAS_IN_XC */
231 
232     memset(rho,  0,   nvclen*sizeof(real));
233     memset(grad, 0, 3*nvclen*sizeof(real));
234     for(ibl=0; ibl<nblocks[0]; ibl++)
235         for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
236             const real * restrict gaoi = gao + idx*nvclen;
237             const real * restrict tmpi = tmp + idx*nvclen;
238             for(k=0; k<nvclen; k++) {
239                 rho[k]     += gaoi[k]*tmpi[k];
240                 grad[k][0] += gaoi[k + nvclen*nbast]*tmpi[k]*2;
241                 grad[k][1] += gaoi[k + nvclen*nbast*2]*tmpi[k]*2;
242                 grad[k][2] += gaoi[k + nvclen*nbast*3]*tmpi[k]*2;
243             }
244         }
245 }
246 
247 /** Computes the expectation value <o|dmat|o'> for a nonsymmetric matrix
248   and given set of precomputed orbital values gao. Sparsity of gao as
249   determined with help of nblocks and iblocks is used to reduce the
250   computational effort.
251   @param dmat full square symmetric matrix
252   @param nbast size of dmat
253   @param gao orbital matrix[nbast][nvclen]. Set values are determied
254   by nblocks and iblocks, other values shall not be accessed.
255   @param nblocks number of nonzero row blocks in gao
256   @param iblocks ranges [a,b) of nonzero blocks in gao.
257   @param ldaib not used
258   @param tmp temporary matrix [nbast][nvclen]
259   @param nvclen batch length - number of columns in gao.
260   @param rho the vector[nvclen] where the computed expectation values
261    will be stored.
262 */
263 void
getexp_blocked_lda(int nbast,const real * dmat,const real * gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real * rho)264 getexp_blocked_lda(int nbast, const real * dmat, const real * gao,
265                    const int* nblocks, const int (*iblocks)[2],
266                    int ldaib, real *tmp, int nvclen, real *rho)
267 {
268   /*
269     DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
270     DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
271     DIMENSION TMP(NVCLEN,NBAST)
272   */
273   int ibl, idx, jbl, jdx, k;
274   real d;
275 
276   /* dzero(TMP,NVCLEN*NBAST) */
277   zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
278 
279   /* only first symmetry */
280   for(ibl=0; ibl<nblocks[0]; ibl++)
281     /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
282     for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
283       const real * gaoi = gao + idx*nvclen;
284       for(jbl=0; jbl<nblocks[0]; jbl++) {
285         for(jdx=iblocks[jbl][0]; jdx<iblocks[jbl][1]; jdx++) {
286           real *tmpj = tmp + jdx*nvclen;
287           d = dmat[idx +jdx*nbast];
288           for(k=0; k<nvclen; k++)
289             tmpj[k] += gaoi[k]*d;
290         }
291       }
292     }
293 
294   memset(rho, 0, nvclen*sizeof(real));
295   for(ibl=0; ibl<nblocks[0]; ibl++)
296     for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
297       const real * gaoi = gao + idx*nvclen;
298       const real * tmpi = tmp + idx*nvclen;
299       for(k=0; k<nvclen; k++)
300         rho[k] += gaoi[k]*tmpi[k];
301     }
302 }
303 
304 /** Computes the expectation value <o|dmat|o'> and its derivatives for
305   a nonsymmetric matrix and given set of precomputed orbital values
306   and their cartesian derivatives gao. Sparsity of gao as determined
307   with help of nblocks and iblocks is used to reduce the computational
308   effort.
309 
310   @param dmat full square symmetric matrix
311   @param nbast size of dmat
312 
313   @param gao orbital matrix[4][nbast][nvclen]. First block [0][][]
314   contains orbital values. Subsequent blocks - orbital derivatives wrt
315   x,y, and z coordinates. Set values are determied by nblocks and
316   iblocks, other values shall not be accessed.
317 
318   @param nblocks number of nonzero row blocks in gao
319   @param iblocks ranges [a,b) of nonzero blocks in gao.
320   @param ldaib not used
321   @param tmp temporary matrix [nbast][nvclen]
322   @param nvclen batch length - number of columns in gao.
323   @param rgrad a vector of quartets where the computed expectation
324   values and gradient values will be stored.  */
325 void
getexp_blocked_gga(int nbast,const real * dmat,const real * gao,const int * nblocks,const int (* iblocks)[2],int ldaib,real * tmp,int nvclen,real (* rgrad)[4])326 getexp_blocked_gga(int nbast, const real * dmat, const real * gao,
327                    const int* nblocks, const int (*iblocks)[2],
328                    int ldaib, real *tmp, int nvclen,
329                    real (*rgrad)[4])
330 {
331 /*
332       DIMENSION DMAT(NBAST,NBAST), GAO(NVCLEN,NBAST,*)
333       DIMENSION NBLOCKS(NSYM), IBLOCKS(2,LDAIB,NSYM), RHO(NVCLEN)
334       DIMENSION TMP(NVCLEN,NBAST)
335 */
336     int ibl, idx, jbl, jdx, k;
337     real d;
338 
339     /* dzero(TMP,NVCLEN*NBAST) */
340     zeroorbs(tmp, nblocks, iblocks, ldaib, nvclen);
341 
342     /* only first symmetry */
343     for(ibl=0; ibl<nblocks[0]; ibl++)
344         /* print *,"block ", IBL,IBLOCKS(1,IBL,ISYM),IBLOCKS(2,IBL,ISYM) */
345         for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
346             const real * gaoi = gao + idx*nvclen;
347             for(jbl=0; jbl<nblocks[0]; jbl++) {
348                 for(jdx=iblocks[jbl][0]; jdx<iblocks[jbl][1]; jdx++) {
349                     real *tmpj = tmp + jdx*nvclen;
350                     d = dmat[idx +jdx*nbast] + dmat[jdx +idx*nbast];
351                     for(k=0; k<nvclen; k++)
352                         tmpj[k] += gaoi[k]*d;
353                 }
354             }
355         }
356 
357     memset(rgrad, 0, 4*nvclen*sizeof(real));
358     for(ibl=0; ibl<nblocks[0]; ibl++)
359       for(idx=iblocks[ibl][0]; idx<iblocks[ibl][1]; idx++) {
360         const real * gaoi = gao + idx*nvclen;
361         const real * tmpi = tmp + idx*nvclen;
362         for(k=0; k<nvclen; k++) {
363           rgrad[k][0] += gaoi[k                 ]*tmpi[k]*0.5;
364           rgrad[k][1] += gaoi[k + nvclen*nbast  ]*tmpi[k];
365           rgrad[k][2] += gaoi[k + nvclen*nbast*2]*tmpi[k];
366           rgrad[k][3] += gaoi[k + nvclen*nbast*3]*tmpi[k];
367         }
368       }
369 }
370